{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999820939352159, "eval_steps": 500, "global_step": 27923, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.581212956828478e-05, "grad_norm": 10.962271690368652, "learning_rate": 2.386634844868735e-07, "loss": 3.4992, "step": 1 }, { "epoch": 7.162425913656956e-05, "grad_norm": 27.797842025756836, "learning_rate": 4.77326968973747e-07, "loss": 3.1642, "step": 2 }, { "epoch": 0.00010743638870485433, "grad_norm": 9.58411979675293, "learning_rate": 7.159904534606206e-07, "loss": 3.0653, "step": 3 }, { "epoch": 0.00014324851827313912, "grad_norm": 20.217010498046875, "learning_rate": 9.54653937947494e-07, "loss": 2.3841, "step": 4 }, { "epoch": 0.00017906064784142388, "grad_norm": 7.880495071411133, "learning_rate": 1.1933174224343676e-06, "loss": 2.9288, "step": 5 }, { "epoch": 0.00021487277740970867, "grad_norm": 51.29137420654297, "learning_rate": 1.4319809069212413e-06, "loss": 3.7625, "step": 6 }, { "epoch": 0.00025068490697799346, "grad_norm": 8.99178409576416, "learning_rate": 1.6706443914081146e-06, "loss": 3.156, "step": 7 }, { "epoch": 0.00028649703654627824, "grad_norm": 7.409815788269043, "learning_rate": 1.909307875894988e-06, "loss": 3.0058, "step": 8 }, { "epoch": 0.000322309166114563, "grad_norm": 26.714468002319336, "learning_rate": 2.1479713603818614e-06, "loss": 3.0568, "step": 9 }, { "epoch": 0.00035812129568284776, "grad_norm": 9.890267372131348, "learning_rate": 2.386634844868735e-06, "loss": 2.9411, "step": 10 }, { "epoch": 0.00039393342525113255, "grad_norm": 6.088392734527588, "learning_rate": 2.625298329355609e-06, "loss": 2.6308, "step": 11 }, { "epoch": 0.00042974555481941734, "grad_norm": 5.2657365798950195, "learning_rate": 2.8639618138424826e-06, "loss": 2.3566, "step": 12 }, { "epoch": 0.0004655576843877021, "grad_norm": 14.518162727355957, "learning_rate": 3.1026252983293554e-06, "loss": 3.2695, "step": 13 }, { "epoch": 0.0005013698139559869, "grad_norm": 42.87174987792969, "learning_rate": 3.341288782816229e-06, "loss": 3.3732, "step": 14 }, { "epoch": 0.0005371819435242717, "grad_norm": 5.752267837524414, "learning_rate": 3.579952267303103e-06, "loss": 2.6332, "step": 15 }, { "epoch": 0.0005729940730925565, "grad_norm": 5.9656572341918945, "learning_rate": 3.818615751789976e-06, "loss": 2.5045, "step": 16 }, { "epoch": 0.0006088062026608413, "grad_norm": 15.938884735107422, "learning_rate": 4.05727923627685e-06, "loss": 3.2342, "step": 17 }, { "epoch": 0.000644618332229126, "grad_norm": 12.065786361694336, "learning_rate": 4.295942720763723e-06, "loss": 2.4276, "step": 18 }, { "epoch": 0.0006804304617974107, "grad_norm": 9.022994995117188, "learning_rate": 4.5346062052505965e-06, "loss": 3.5128, "step": 19 }, { "epoch": 0.0007162425913656955, "grad_norm": 8.21558952331543, "learning_rate": 4.77326968973747e-06, "loss": 2.6403, "step": 20 }, { "epoch": 0.0007520547209339803, "grad_norm": 6.674815654754639, "learning_rate": 5.011933174224344e-06, "loss": 2.9362, "step": 21 }, { "epoch": 0.0007878668505022651, "grad_norm": 8.615503311157227, "learning_rate": 5.250596658711218e-06, "loss": 3.4083, "step": 22 }, { "epoch": 0.0008236789800705499, "grad_norm": 5.087034225463867, "learning_rate": 5.489260143198091e-06, "loss": 2.6269, "step": 23 }, { "epoch": 0.0008594911096388347, "grad_norm": 5.468980312347412, "learning_rate": 5.727923627684965e-06, "loss": 2.7802, "step": 24 }, { "epoch": 0.0008953032392071195, "grad_norm": 4.205332279205322, "learning_rate": 5.966587112171838e-06, "loss": 2.6768, "step": 25 }, { "epoch": 0.0009311153687754042, "grad_norm": 2.8609237670898438, "learning_rate": 6.205250596658711e-06, "loss": 2.3405, "step": 26 }, { "epoch": 0.000966927498343689, "grad_norm": 4.893277168273926, "learning_rate": 6.4439140811455855e-06, "loss": 2.9049, "step": 27 }, { "epoch": 0.0010027396279119738, "grad_norm": 2.7744317054748535, "learning_rate": 6.682577565632458e-06, "loss": 2.3621, "step": 28 }, { "epoch": 0.0010385517574802586, "grad_norm": 4.931545257568359, "learning_rate": 6.921241050119331e-06, "loss": 2.8006, "step": 29 }, { "epoch": 0.0010743638870485434, "grad_norm": 2.949612855911255, "learning_rate": 7.159904534606206e-06, "loss": 2.7418, "step": 30 }, { "epoch": 0.0011101760166168282, "grad_norm": 6.12894868850708, "learning_rate": 7.398568019093079e-06, "loss": 3.279, "step": 31 }, { "epoch": 0.001145988146185113, "grad_norm": 4.933887004852295, "learning_rate": 7.637231503579952e-06, "loss": 3.0113, "step": 32 }, { "epoch": 0.0011818002757533978, "grad_norm": 3.533989906311035, "learning_rate": 7.875894988066825e-06, "loss": 2.4149, "step": 33 }, { "epoch": 0.0012176124053216825, "grad_norm": 5.942819118499756, "learning_rate": 8.1145584725537e-06, "loss": 2.7419, "step": 34 }, { "epoch": 0.0012534245348899673, "grad_norm": 5.094061374664307, "learning_rate": 8.353221957040573e-06, "loss": 3.1046, "step": 35 }, { "epoch": 0.001289236664458252, "grad_norm": 4.425570487976074, "learning_rate": 8.591885441527446e-06, "loss": 2.9092, "step": 36 }, { "epoch": 0.0013250487940265367, "grad_norm": 3.693305253982544, "learning_rate": 8.83054892601432e-06, "loss": 2.7984, "step": 37 }, { "epoch": 0.0013608609235948215, "grad_norm": 4.119627475738525, "learning_rate": 9.069212410501193e-06, "loss": 2.4779, "step": 38 }, { "epoch": 0.0013966730531631063, "grad_norm": 4.064864158630371, "learning_rate": 9.307875894988068e-06, "loss": 2.5798, "step": 39 }, { "epoch": 0.001432485182731391, "grad_norm": 6.357635974884033, "learning_rate": 9.54653937947494e-06, "loss": 2.9759, "step": 40 }, { "epoch": 0.0014682973122996758, "grad_norm": 4.885557651519775, "learning_rate": 9.785202863961815e-06, "loss": 2.6862, "step": 41 }, { "epoch": 0.0015041094418679606, "grad_norm": 4.694367408752441, "learning_rate": 1.0023866348448688e-05, "loss": 2.8069, "step": 42 }, { "epoch": 0.0015399215714362454, "grad_norm": 6.279577732086182, "learning_rate": 1.026252983293556e-05, "loss": 2.5923, "step": 43 }, { "epoch": 0.0015757337010045302, "grad_norm": 3.011857032775879, "learning_rate": 1.0501193317422435e-05, "loss": 2.4332, "step": 44 }, { "epoch": 0.001611545830572815, "grad_norm": 5.952685832977295, "learning_rate": 1.0739856801909308e-05, "loss": 3.1646, "step": 45 }, { "epoch": 0.0016473579601410998, "grad_norm": 3.988813877105713, "learning_rate": 1.0978520286396183e-05, "loss": 2.7031, "step": 46 }, { "epoch": 0.0016831700897093846, "grad_norm": 4.060389041900635, "learning_rate": 1.1217183770883056e-05, "loss": 2.7739, "step": 47 }, { "epoch": 0.0017189822192776693, "grad_norm": 3.857228994369507, "learning_rate": 1.145584725536993e-05, "loss": 2.5465, "step": 48 }, { "epoch": 0.0017547943488459541, "grad_norm": 2.791111469268799, "learning_rate": 1.1694510739856803e-05, "loss": 2.465, "step": 49 }, { "epoch": 0.001790606478414239, "grad_norm": 3.5531413555145264, "learning_rate": 1.1933174224343676e-05, "loss": 2.6838, "step": 50 }, { "epoch": 0.0018264186079825237, "grad_norm": 3.4963018894195557, "learning_rate": 1.2171837708830549e-05, "loss": 2.4643, "step": 51 }, { "epoch": 0.0018622307375508085, "grad_norm": 2.362746238708496, "learning_rate": 1.2410501193317422e-05, "loss": 2.2958, "step": 52 }, { "epoch": 0.0018980428671190933, "grad_norm": 2.81494140625, "learning_rate": 1.2649164677804295e-05, "loss": 2.6014, "step": 53 }, { "epoch": 0.001933854996687378, "grad_norm": 2.551485061645508, "learning_rate": 1.2887828162291171e-05, "loss": 2.5691, "step": 54 }, { "epoch": 0.0019696671262556626, "grad_norm": 7.383233547210693, "learning_rate": 1.3126491646778044e-05, "loss": 3.3642, "step": 55 }, { "epoch": 0.0020054792558239476, "grad_norm": 6.4138360023498535, "learning_rate": 1.3365155131264917e-05, "loss": 3.1544, "step": 56 }, { "epoch": 0.002041291385392232, "grad_norm": 3.4359352588653564, "learning_rate": 1.360381861575179e-05, "loss": 2.7802, "step": 57 }, { "epoch": 0.0020771035149605172, "grad_norm": 1.671030879020691, "learning_rate": 1.3842482100238662e-05, "loss": 2.0855, "step": 58 }, { "epoch": 0.002112915644528802, "grad_norm": 2.736416816711426, "learning_rate": 1.4081145584725539e-05, "loss": 2.5001, "step": 59 }, { "epoch": 0.002148727774097087, "grad_norm": 2.362579584121704, "learning_rate": 1.4319809069212412e-05, "loss": 2.5147, "step": 60 }, { "epoch": 0.0021845399036653714, "grad_norm": 4.88362455368042, "learning_rate": 1.4558472553699284e-05, "loss": 2.7335, "step": 61 }, { "epoch": 0.0022203520332336564, "grad_norm": 3.396545171737671, "learning_rate": 1.4797136038186157e-05, "loss": 2.2491, "step": 62 }, { "epoch": 0.002256164162801941, "grad_norm": 2.045161008834839, "learning_rate": 1.5035799522673034e-05, "loss": 2.1031, "step": 63 }, { "epoch": 0.002291976292370226, "grad_norm": 2.4404568672180176, "learning_rate": 1.5274463007159905e-05, "loss": 2.4767, "step": 64 }, { "epoch": 0.0023277884219385105, "grad_norm": 1.991403341293335, "learning_rate": 1.551312649164678e-05, "loss": 2.2476, "step": 65 }, { "epoch": 0.0023636005515067955, "grad_norm": 2.6608493328094482, "learning_rate": 1.575178997613365e-05, "loss": 2.4446, "step": 66 }, { "epoch": 0.00239941268107508, "grad_norm": 4.269506454467773, "learning_rate": 1.5990453460620525e-05, "loss": 2.6558, "step": 67 }, { "epoch": 0.002435224810643365, "grad_norm": 2.757368564605713, "learning_rate": 1.62291169451074e-05, "loss": 2.4896, "step": 68 }, { "epoch": 0.0024710369402116497, "grad_norm": 3.7774527072906494, "learning_rate": 1.6467780429594274e-05, "loss": 2.6252, "step": 69 }, { "epoch": 0.0025068490697799347, "grad_norm": 2.459183692932129, "learning_rate": 1.6706443914081145e-05, "loss": 2.3044, "step": 70 }, { "epoch": 0.0025426611993482192, "grad_norm": 1.71919584274292, "learning_rate": 1.694510739856802e-05, "loss": 2.2353, "step": 71 }, { "epoch": 0.002578473328916504, "grad_norm": 2.3338592052459717, "learning_rate": 1.718377088305489e-05, "loss": 2.3211, "step": 72 }, { "epoch": 0.002614285458484789, "grad_norm": 4.527637958526611, "learning_rate": 1.742243436754177e-05, "loss": 2.5248, "step": 73 }, { "epoch": 0.0026500975880530734, "grad_norm": 4.273586273193359, "learning_rate": 1.766109785202864e-05, "loss": 2.7464, "step": 74 }, { "epoch": 0.0026859097176213584, "grad_norm": 2.748335838317871, "learning_rate": 1.7899761336515515e-05, "loss": 2.4126, "step": 75 }, { "epoch": 0.002721721847189643, "grad_norm": 4.273103713989258, "learning_rate": 1.8138424821002386e-05, "loss": 2.5641, "step": 76 }, { "epoch": 0.002757533976757928, "grad_norm": 2.175398826599121, "learning_rate": 1.837708830548926e-05, "loss": 2.335, "step": 77 }, { "epoch": 0.0027933461063262125, "grad_norm": 1.991686463356018, "learning_rate": 1.8615751789976135e-05, "loss": 2.1709, "step": 78 }, { "epoch": 0.0028291582358944975, "grad_norm": 2.3759572505950928, "learning_rate": 1.885441527446301e-05, "loss": 2.2788, "step": 79 }, { "epoch": 0.002864970365462782, "grad_norm": 2.7693066596984863, "learning_rate": 1.909307875894988e-05, "loss": 2.2775, "step": 80 }, { "epoch": 0.002900782495031067, "grad_norm": 3.2281436920166016, "learning_rate": 1.9331742243436756e-05, "loss": 2.453, "step": 81 }, { "epoch": 0.0029365946245993517, "grad_norm": 2.9001846313476562, "learning_rate": 1.957040572792363e-05, "loss": 2.4301, "step": 82 }, { "epoch": 0.0029724067541676367, "grad_norm": 4.701358795166016, "learning_rate": 1.98090692124105e-05, "loss": 2.3049, "step": 83 }, { "epoch": 0.0030082188837359213, "grad_norm": 3.314978837966919, "learning_rate": 2.0047732696897376e-05, "loss": 2.5192, "step": 84 }, { "epoch": 0.0030440310133042063, "grad_norm": 3.295654535293579, "learning_rate": 2.0286396181384247e-05, "loss": 2.3816, "step": 85 }, { "epoch": 0.003079843142872491, "grad_norm": 4.113006591796875, "learning_rate": 2.052505966587112e-05, "loss": 2.3823, "step": 86 }, { "epoch": 0.003115655272440776, "grad_norm": 6.134260654449463, "learning_rate": 2.0763723150357996e-05, "loss": 2.9283, "step": 87 }, { "epoch": 0.0031514674020090604, "grad_norm": 3.5906167030334473, "learning_rate": 2.100238663484487e-05, "loss": 2.7102, "step": 88 }, { "epoch": 0.0031872795315773454, "grad_norm": 2.333726167678833, "learning_rate": 2.1241050119331742e-05, "loss": 2.0401, "step": 89 }, { "epoch": 0.00322309166114563, "grad_norm": 2.3526418209075928, "learning_rate": 2.1479713603818617e-05, "loss": 2.0289, "step": 90 }, { "epoch": 0.003258903790713915, "grad_norm": 4.23220157623291, "learning_rate": 2.171837708830549e-05, "loss": 2.6101, "step": 91 }, { "epoch": 0.0032947159202821995, "grad_norm": 2.601986885070801, "learning_rate": 2.1957040572792366e-05, "loss": 2.1812, "step": 92 }, { "epoch": 0.0033305280498504846, "grad_norm": 3.328922748565674, "learning_rate": 2.2195704057279237e-05, "loss": 2.4358, "step": 93 }, { "epoch": 0.003366340179418769, "grad_norm": 3.1595191955566406, "learning_rate": 2.243436754176611e-05, "loss": 2.243, "step": 94 }, { "epoch": 0.003402152308987054, "grad_norm": 3.6129677295684814, "learning_rate": 2.2673031026252983e-05, "loss": 1.9055, "step": 95 }, { "epoch": 0.0034379644385553387, "grad_norm": 2.5094430446624756, "learning_rate": 2.291169451073986e-05, "loss": 1.9977, "step": 96 }, { "epoch": 0.0034737765681236233, "grad_norm": 5.377303600311279, "learning_rate": 2.3150357995226732e-05, "loss": 2.5395, "step": 97 }, { "epoch": 0.0035095886976919083, "grad_norm": 5.151118755340576, "learning_rate": 2.3389021479713606e-05, "loss": 2.4771, "step": 98 }, { "epoch": 0.003545400827260193, "grad_norm": 3.074028968811035, "learning_rate": 2.3627684964200477e-05, "loss": 2.1024, "step": 99 }, { "epoch": 0.003581212956828478, "grad_norm": 2.7826621532440186, "learning_rate": 2.3866348448687352e-05, "loss": 2.4725, "step": 100 }, { "epoch": 0.0036170250863967624, "grad_norm": 4.709033012390137, "learning_rate": 2.4105011933174227e-05, "loss": 2.6447, "step": 101 }, { "epoch": 0.0036528372159650474, "grad_norm": 5.2452311515808105, "learning_rate": 2.4343675417661098e-05, "loss": 2.373, "step": 102 }, { "epoch": 0.003688649345533332, "grad_norm": 2.4772093296051025, "learning_rate": 2.4582338902147972e-05, "loss": 2.3534, "step": 103 }, { "epoch": 0.003724461475101617, "grad_norm": 4.724703788757324, "learning_rate": 2.4821002386634844e-05, "loss": 2.6285, "step": 104 }, { "epoch": 0.0037602736046699016, "grad_norm": 2.5867512226104736, "learning_rate": 2.5059665871121718e-05, "loss": 2.2477, "step": 105 }, { "epoch": 0.0037960857342381866, "grad_norm": 4.422124862670898, "learning_rate": 2.529832935560859e-05, "loss": 2.3933, "step": 106 }, { "epoch": 0.003831897863806471, "grad_norm": 3.257288932800293, "learning_rate": 2.5536992840095464e-05, "loss": 1.8866, "step": 107 }, { "epoch": 0.003867709993374756, "grad_norm": 3.285341501235962, "learning_rate": 2.5775656324582342e-05, "loss": 2.3934, "step": 108 }, { "epoch": 0.0039035221229430407, "grad_norm": 3.1677615642547607, "learning_rate": 2.6014319809069216e-05, "loss": 2.4064, "step": 109 }, { "epoch": 0.003939334252511325, "grad_norm": 5.5628461837768555, "learning_rate": 2.6252983293556088e-05, "loss": 2.5679, "step": 110 }, { "epoch": 0.003975146382079611, "grad_norm": 3.9429681301116943, "learning_rate": 2.6491646778042962e-05, "loss": 2.5226, "step": 111 }, { "epoch": 0.004010958511647895, "grad_norm": 3.8189384937286377, "learning_rate": 2.6730310262529833e-05, "loss": 2.3188, "step": 112 }, { "epoch": 0.00404677064121618, "grad_norm": 3.60325026512146, "learning_rate": 2.6968973747016708e-05, "loss": 2.3814, "step": 113 }, { "epoch": 0.004082582770784464, "grad_norm": 4.04654598236084, "learning_rate": 2.720763723150358e-05, "loss": 2.0409, "step": 114 }, { "epoch": 0.00411839490035275, "grad_norm": 2.3440001010894775, "learning_rate": 2.7446300715990454e-05, "loss": 2.1214, "step": 115 }, { "epoch": 0.0041542070299210344, "grad_norm": 2.3456714153289795, "learning_rate": 2.7684964200477325e-05, "loss": 1.7589, "step": 116 }, { "epoch": 0.004190019159489319, "grad_norm": 3.4626646041870117, "learning_rate": 2.7923627684964203e-05, "loss": 2.4336, "step": 117 }, { "epoch": 0.004225831289057604, "grad_norm": 2.838393211364746, "learning_rate": 2.8162291169451077e-05, "loss": 1.9198, "step": 118 }, { "epoch": 0.004261643418625889, "grad_norm": 3.0637154579162598, "learning_rate": 2.840095465393795e-05, "loss": 2.0948, "step": 119 }, { "epoch": 0.004297455548194174, "grad_norm": 3.7256317138671875, "learning_rate": 2.8639618138424823e-05, "loss": 2.4361, "step": 120 }, { "epoch": 0.004333267677762458, "grad_norm": 3.2016754150390625, "learning_rate": 2.8878281622911694e-05, "loss": 2.1261, "step": 121 }, { "epoch": 0.004369079807330743, "grad_norm": 4.712606906890869, "learning_rate": 2.911694510739857e-05, "loss": 2.0613, "step": 122 }, { "epoch": 0.004404891936899027, "grad_norm": 3.5016684532165527, "learning_rate": 2.935560859188544e-05, "loss": 2.1061, "step": 123 }, { "epoch": 0.004440704066467313, "grad_norm": 2.8278017044067383, "learning_rate": 2.9594272076372315e-05, "loss": 2.5463, "step": 124 }, { "epoch": 0.004476516196035597, "grad_norm": 3.4511027336120605, "learning_rate": 2.983293556085919e-05, "loss": 2.4129, "step": 125 }, { "epoch": 0.004512328325603882, "grad_norm": 2.2344186305999756, "learning_rate": 3.0071599045346067e-05, "loss": 2.1861, "step": 126 }, { "epoch": 0.0045481404551721664, "grad_norm": 2.4487826824188232, "learning_rate": 3.031026252983294e-05, "loss": 1.985, "step": 127 }, { "epoch": 0.004583952584740452, "grad_norm": 3.2509713172912598, "learning_rate": 3.054892601431981e-05, "loss": 2.2934, "step": 128 }, { "epoch": 0.0046197647143087365, "grad_norm": 1.7187750339508057, "learning_rate": 3.0787589498806684e-05, "loss": 1.9708, "step": 129 }, { "epoch": 0.004655576843877021, "grad_norm": 5.076966285705566, "learning_rate": 3.102625298329356e-05, "loss": 2.6119, "step": 130 }, { "epoch": 0.004691388973445306, "grad_norm": 3.493896245956421, "learning_rate": 3.126491646778043e-05, "loss": 2.5397, "step": 131 }, { "epoch": 0.004727201103013591, "grad_norm": 2.5739636421203613, "learning_rate": 3.15035799522673e-05, "loss": 2.1216, "step": 132 }, { "epoch": 0.004763013232581876, "grad_norm": 2.577495574951172, "learning_rate": 3.1742243436754176e-05, "loss": 2.1758, "step": 133 }, { "epoch": 0.00479882536215016, "grad_norm": 2.5767970085144043, "learning_rate": 3.198090692124105e-05, "loss": 2.2066, "step": 134 }, { "epoch": 0.004834637491718445, "grad_norm": 3.6485743522644043, "learning_rate": 3.2219570405727925e-05, "loss": 2.4, "step": 135 }, { "epoch": 0.00487044962128673, "grad_norm": 3.2032318115234375, "learning_rate": 3.24582338902148e-05, "loss": 2.323, "step": 136 }, { "epoch": 0.004906261750855015, "grad_norm": 3.5218751430511475, "learning_rate": 3.2696897374701674e-05, "loss": 2.1281, "step": 137 }, { "epoch": 0.004942073880423299, "grad_norm": 3.35833477973938, "learning_rate": 3.293556085918855e-05, "loss": 2.3306, "step": 138 }, { "epoch": 0.004977886009991584, "grad_norm": 2.5601823329925537, "learning_rate": 3.3174224343675416e-05, "loss": 2.0496, "step": 139 }, { "epoch": 0.005013698139559869, "grad_norm": 5.0960373878479, "learning_rate": 3.341288782816229e-05, "loss": 2.6784, "step": 140 }, { "epoch": 0.005049510269128154, "grad_norm": 3.276700973510742, "learning_rate": 3.3651551312649165e-05, "loss": 2.0709, "step": 141 }, { "epoch": 0.0050853223986964385, "grad_norm": 3.034759283065796, "learning_rate": 3.389021479713604e-05, "loss": 2.1616, "step": 142 }, { "epoch": 0.005121134528264723, "grad_norm": 3.5838255882263184, "learning_rate": 3.4128878281622915e-05, "loss": 2.5447, "step": 143 }, { "epoch": 0.005156946657833008, "grad_norm": 3.239966630935669, "learning_rate": 3.436754176610978e-05, "loss": 2.4755, "step": 144 }, { "epoch": 0.005192758787401293, "grad_norm": 3.4092092514038086, "learning_rate": 3.4606205250596664e-05, "loss": 2.2712, "step": 145 }, { "epoch": 0.005228570916969578, "grad_norm": 2.1897785663604736, "learning_rate": 3.484486873508354e-05, "loss": 2.1974, "step": 146 }, { "epoch": 0.005264383046537862, "grad_norm": 3.396059513092041, "learning_rate": 3.5083532219570406e-05, "loss": 2.4299, "step": 147 }, { "epoch": 0.005300195176106147, "grad_norm": 4.3964104652404785, "learning_rate": 3.532219570405728e-05, "loss": 2.2913, "step": 148 }, { "epoch": 0.005336007305674432, "grad_norm": 2.8789312839508057, "learning_rate": 3.5560859188544155e-05, "loss": 1.6942, "step": 149 }, { "epoch": 0.005371819435242717, "grad_norm": 1.7784701585769653, "learning_rate": 3.579952267303103e-05, "loss": 1.9099, "step": 150 }, { "epoch": 0.005407631564811001, "grad_norm": 3.5115861892700195, "learning_rate": 3.60381861575179e-05, "loss": 2.4334, "step": 151 }, { "epoch": 0.005443443694379286, "grad_norm": 2.5266120433807373, "learning_rate": 3.627684964200477e-05, "loss": 2.1036, "step": 152 }, { "epoch": 0.005479255823947571, "grad_norm": 2.6542105674743652, "learning_rate": 3.651551312649165e-05, "loss": 2.1469, "step": 153 }, { "epoch": 0.005515067953515856, "grad_norm": 6.56311559677124, "learning_rate": 3.675417661097852e-05, "loss": 2.9395, "step": 154 }, { "epoch": 0.0055508800830841405, "grad_norm": 2.516366720199585, "learning_rate": 3.6992840095465396e-05, "loss": 1.9398, "step": 155 }, { "epoch": 0.005586692212652425, "grad_norm": 2.376039981842041, "learning_rate": 3.723150357995227e-05, "loss": 2.1975, "step": 156 }, { "epoch": 0.0056225043422207105, "grad_norm": 3.064851760864258, "learning_rate": 3.7470167064439145e-05, "loss": 2.2792, "step": 157 }, { "epoch": 0.005658316471788995, "grad_norm": 3.0628392696380615, "learning_rate": 3.770883054892602e-05, "loss": 2.1432, "step": 158 }, { "epoch": 0.00569412860135728, "grad_norm": 4.342270851135254, "learning_rate": 3.794749403341289e-05, "loss": 2.2726, "step": 159 }, { "epoch": 0.005729940730925564, "grad_norm": 3.9450583457946777, "learning_rate": 3.818615751789976e-05, "loss": 2.2563, "step": 160 }, { "epoch": 0.00576575286049385, "grad_norm": 2.65295672416687, "learning_rate": 3.8424821002386637e-05, "loss": 2.279, "step": 161 }, { "epoch": 0.005801564990062134, "grad_norm": 3.084883451461792, "learning_rate": 3.866348448687351e-05, "loss": 2.347, "step": 162 }, { "epoch": 0.005837377119630419, "grad_norm": 2.0659403800964355, "learning_rate": 3.8902147971360386e-05, "loss": 1.982, "step": 163 }, { "epoch": 0.005873189249198703, "grad_norm": 2.8707988262176514, "learning_rate": 3.914081145584726e-05, "loss": 2.2617, "step": 164 }, { "epoch": 0.005909001378766989, "grad_norm": 2.436180353164673, "learning_rate": 3.9379474940334135e-05, "loss": 2.0672, "step": 165 }, { "epoch": 0.005944813508335273, "grad_norm": 3.461872100830078, "learning_rate": 3.9618138424821e-05, "loss": 2.024, "step": 166 }, { "epoch": 0.005980625637903558, "grad_norm": 2.2728688716888428, "learning_rate": 3.985680190930788e-05, "loss": 1.7771, "step": 167 }, { "epoch": 0.0060164377674718425, "grad_norm": 2.9046618938446045, "learning_rate": 4.009546539379475e-05, "loss": 2.1021, "step": 168 }, { "epoch": 0.006052249897040127, "grad_norm": 3.4231221675872803, "learning_rate": 4.0334128878281626e-05, "loss": 2.4282, "step": 169 }, { "epoch": 0.0060880620266084125, "grad_norm": 2.6314404010772705, "learning_rate": 4.0572792362768494e-05, "loss": 2.5756, "step": 170 }, { "epoch": 0.006123874156176697, "grad_norm": 1.9490439891815186, "learning_rate": 4.081145584725537e-05, "loss": 1.9626, "step": 171 }, { "epoch": 0.006159686285744982, "grad_norm": 2.614915370941162, "learning_rate": 4.105011933174224e-05, "loss": 2.0636, "step": 172 }, { "epoch": 0.006195498415313266, "grad_norm": 2.4821505546569824, "learning_rate": 4.1288782816229125e-05, "loss": 2.2238, "step": 173 }, { "epoch": 0.006231310544881552, "grad_norm": 3.056910991668701, "learning_rate": 4.152744630071599e-05, "loss": 1.9857, "step": 174 }, { "epoch": 0.006267122674449836, "grad_norm": 3.037914514541626, "learning_rate": 4.176610978520287e-05, "loss": 2.0045, "step": 175 }, { "epoch": 0.006302934804018121, "grad_norm": 2.768630266189575, "learning_rate": 4.200477326968974e-05, "loss": 2.2729, "step": 176 }, { "epoch": 0.006338746933586405, "grad_norm": 6.603919506072998, "learning_rate": 4.2243436754176616e-05, "loss": 2.4431, "step": 177 }, { "epoch": 0.006374559063154691, "grad_norm": 2.241738796234131, "learning_rate": 4.2482100238663484e-05, "loss": 2.1826, "step": 178 }, { "epoch": 0.006410371192722975, "grad_norm": 2.1560134887695312, "learning_rate": 4.272076372315036e-05, "loss": 2.041, "step": 179 }, { "epoch": 0.00644618332229126, "grad_norm": 2.806037187576294, "learning_rate": 4.295942720763723e-05, "loss": 2.1842, "step": 180 }, { "epoch": 0.0064819954518595445, "grad_norm": 3.3710579872131348, "learning_rate": 4.319809069212411e-05, "loss": 1.8961, "step": 181 }, { "epoch": 0.00651780758142783, "grad_norm": 2.774545192718506, "learning_rate": 4.343675417661098e-05, "loss": 2.1378, "step": 182 }, { "epoch": 0.0065536197109961145, "grad_norm": 4.045608043670654, "learning_rate": 4.367541766109786e-05, "loss": 2.3613, "step": 183 }, { "epoch": 0.006589431840564399, "grad_norm": 4.307875633239746, "learning_rate": 4.391408114558473e-05, "loss": 2.5546, "step": 184 }, { "epoch": 0.006625243970132684, "grad_norm": 2.564769744873047, "learning_rate": 4.41527446300716e-05, "loss": 2.0365, "step": 185 }, { "epoch": 0.006661056099700969, "grad_norm": 3.592845916748047, "learning_rate": 4.4391408114558474e-05, "loss": 2.0478, "step": 186 }, { "epoch": 0.006696868229269254, "grad_norm": 2.4689202308654785, "learning_rate": 4.463007159904535e-05, "loss": 2.099, "step": 187 }, { "epoch": 0.006732680358837538, "grad_norm": 2.8406245708465576, "learning_rate": 4.486873508353222e-05, "loss": 1.972, "step": 188 }, { "epoch": 0.006768492488405823, "grad_norm": 3.0729517936706543, "learning_rate": 4.510739856801909e-05, "loss": 2.3021, "step": 189 }, { "epoch": 0.006804304617974108, "grad_norm": 2.427234649658203, "learning_rate": 4.5346062052505965e-05, "loss": 2.1842, "step": 190 }, { "epoch": 0.006840116747542393, "grad_norm": 2.7807438373565674, "learning_rate": 4.5584725536992847e-05, "loss": 2.2948, "step": 191 }, { "epoch": 0.006875928877110677, "grad_norm": 3.589139699935913, "learning_rate": 4.582338902147972e-05, "loss": 2.3455, "step": 192 }, { "epoch": 0.006911741006678962, "grad_norm": 3.561444044113159, "learning_rate": 4.606205250596659e-05, "loss": 1.9343, "step": 193 }, { "epoch": 0.0069475531362472465, "grad_norm": 2.3971855640411377, "learning_rate": 4.6300715990453463e-05, "loss": 2.144, "step": 194 }, { "epoch": 0.006983365265815532, "grad_norm": 1.9098275899887085, "learning_rate": 4.653937947494034e-05, "loss": 1.8509, "step": 195 }, { "epoch": 0.0070191773953838165, "grad_norm": 2.244335412979126, "learning_rate": 4.677804295942721e-05, "loss": 1.922, "step": 196 }, { "epoch": 0.007054989524952101, "grad_norm": 2.8799850940704346, "learning_rate": 4.701670644391408e-05, "loss": 1.9339, "step": 197 }, { "epoch": 0.007090801654520386, "grad_norm": 2.483138084411621, "learning_rate": 4.7255369928400955e-05, "loss": 2.0267, "step": 198 }, { "epoch": 0.007126613784088671, "grad_norm": 4.479922294616699, "learning_rate": 4.749403341288783e-05, "loss": 2.345, "step": 199 }, { "epoch": 0.007162425913656956, "grad_norm": 2.4808027744293213, "learning_rate": 4.7732696897374704e-05, "loss": 2.1023, "step": 200 }, { "epoch": 0.00719823804322524, "grad_norm": 3.2008464336395264, "learning_rate": 4.797136038186158e-05, "loss": 2.1962, "step": 201 }, { "epoch": 0.007234050172793525, "grad_norm": 3.3301191329956055, "learning_rate": 4.821002386634845e-05, "loss": 2.0843, "step": 202 }, { "epoch": 0.00726986230236181, "grad_norm": 2.2607581615448, "learning_rate": 4.844868735083533e-05, "loss": 1.812, "step": 203 }, { "epoch": 0.007305674431930095, "grad_norm": 2.308560371398926, "learning_rate": 4.8687350835322196e-05, "loss": 2.1753, "step": 204 }, { "epoch": 0.007341486561498379, "grad_norm": 1.8823864459991455, "learning_rate": 4.892601431980907e-05, "loss": 2.0771, "step": 205 }, { "epoch": 0.007377298691066664, "grad_norm": 2.2979161739349365, "learning_rate": 4.9164677804295945e-05, "loss": 2.1303, "step": 206 }, { "epoch": 0.007413110820634949, "grad_norm": 2.1987791061401367, "learning_rate": 4.940334128878282e-05, "loss": 2.1973, "step": 207 }, { "epoch": 0.007448922950203234, "grad_norm": 2.7911312580108643, "learning_rate": 4.964200477326969e-05, "loss": 2.2152, "step": 208 }, { "epoch": 0.0074847350797715186, "grad_norm": 4.45490837097168, "learning_rate": 4.988066825775656e-05, "loss": 2.0745, "step": 209 }, { "epoch": 0.007520547209339803, "grad_norm": 3.342639684677124, "learning_rate": 5.0119331742243436e-05, "loss": 2.3235, "step": 210 }, { "epoch": 0.0075563593389080886, "grad_norm": 2.095116376876831, "learning_rate": 5.035799522673032e-05, "loss": 1.9779, "step": 211 }, { "epoch": 0.007592171468476373, "grad_norm": 2.7536773681640625, "learning_rate": 5.059665871121718e-05, "loss": 2.11, "step": 212 }, { "epoch": 0.007627983598044658, "grad_norm": 2.3608434200286865, "learning_rate": 5.083532219570406e-05, "loss": 2.0265, "step": 213 }, { "epoch": 0.007663795727612942, "grad_norm": 2.917818307876587, "learning_rate": 5.107398568019093e-05, "loss": 1.9071, "step": 214 }, { "epoch": 0.007699607857181228, "grad_norm": 3.007936716079712, "learning_rate": 5.131264916467781e-05, "loss": 2.0646, "step": 215 }, { "epoch": 0.007735419986749512, "grad_norm": 2.02176833152771, "learning_rate": 5.1551312649164684e-05, "loss": 1.9931, "step": 216 }, { "epoch": 0.007771232116317797, "grad_norm": 2.787811040878296, "learning_rate": 5.178997613365155e-05, "loss": 2.1347, "step": 217 }, { "epoch": 0.007807044245886081, "grad_norm": 3.3416104316711426, "learning_rate": 5.202863961813843e-05, "loss": 2.1722, "step": 218 }, { "epoch": 0.007842856375454366, "grad_norm": 3.348334312438965, "learning_rate": 5.22673031026253e-05, "loss": 2.2807, "step": 219 }, { "epoch": 0.00787866850502265, "grad_norm": 5.854157447814941, "learning_rate": 5.2505966587112175e-05, "loss": 2.7781, "step": 220 }, { "epoch": 0.007914480634590935, "grad_norm": 2.1232118606567383, "learning_rate": 5.274463007159904e-05, "loss": 2.0395, "step": 221 }, { "epoch": 0.007950292764159221, "grad_norm": 3.457007646560669, "learning_rate": 5.2983293556085924e-05, "loss": 2.2375, "step": 222 }, { "epoch": 0.007986104893727506, "grad_norm": 3.5690152645111084, "learning_rate": 5.322195704057279e-05, "loss": 2.5559, "step": 223 }, { "epoch": 0.00802191702329579, "grad_norm": 2.673306941986084, "learning_rate": 5.346062052505967e-05, "loss": 2.2094, "step": 224 }, { "epoch": 0.008057729152864075, "grad_norm": 2.7809441089630127, "learning_rate": 5.369928400954655e-05, "loss": 1.9878, "step": 225 }, { "epoch": 0.00809354128243236, "grad_norm": 3.0256967544555664, "learning_rate": 5.3937947494033416e-05, "loss": 1.8963, "step": 226 }, { "epoch": 0.008129353412000644, "grad_norm": 3.9648005962371826, "learning_rate": 5.417661097852029e-05, "loss": 2.0862, "step": 227 }, { "epoch": 0.008165165541568929, "grad_norm": 2.245849609375, "learning_rate": 5.441527446300716e-05, "loss": 1.7293, "step": 228 }, { "epoch": 0.008200977671137213, "grad_norm": 2.413544178009033, "learning_rate": 5.465393794749404e-05, "loss": 2.0762, "step": 229 }, { "epoch": 0.0082367898007055, "grad_norm": 3.0067107677459717, "learning_rate": 5.489260143198091e-05, "loss": 2.1954, "step": 230 }, { "epoch": 0.008272601930273784, "grad_norm": 3.096357583999634, "learning_rate": 5.513126491646778e-05, "loss": 2.2491, "step": 231 }, { "epoch": 0.008308414059842069, "grad_norm": 2.373617172241211, "learning_rate": 5.536992840095465e-05, "loss": 2.1228, "step": 232 }, { "epoch": 0.008344226189410353, "grad_norm": 2.055159091949463, "learning_rate": 5.560859188544153e-05, "loss": 1.9121, "step": 233 }, { "epoch": 0.008380038318978638, "grad_norm": 3.7004029750823975, "learning_rate": 5.5847255369928406e-05, "loss": 2.3244, "step": 234 }, { "epoch": 0.008415850448546923, "grad_norm": 2.1520016193389893, "learning_rate": 5.6085918854415273e-05, "loss": 2.1335, "step": 235 }, { "epoch": 0.008451662578115207, "grad_norm": 4.440451622009277, "learning_rate": 5.6324582338902155e-05, "loss": 2.6448, "step": 236 }, { "epoch": 0.008487474707683492, "grad_norm": 1.7631467580795288, "learning_rate": 5.656324582338902e-05, "loss": 2.0462, "step": 237 }, { "epoch": 0.008523286837251778, "grad_norm": 3.681589365005493, "learning_rate": 5.68019093078759e-05, "loss": 2.3295, "step": 238 }, { "epoch": 0.008559098966820063, "grad_norm": 2.199773073196411, "learning_rate": 5.7040572792362765e-05, "loss": 1.9294, "step": 239 }, { "epoch": 0.008594911096388347, "grad_norm": 2.9512124061584473, "learning_rate": 5.7279236276849646e-05, "loss": 1.9352, "step": 240 }, { "epoch": 0.008630723225956632, "grad_norm": 2.6109206676483154, "learning_rate": 5.7517899761336514e-05, "loss": 2.1316, "step": 241 }, { "epoch": 0.008666535355524916, "grad_norm": 2.3685696125030518, "learning_rate": 5.775656324582339e-05, "loss": 2.1921, "step": 242 }, { "epoch": 0.008702347485093201, "grad_norm": 3.183547019958496, "learning_rate": 5.799522673031027e-05, "loss": 2.0476, "step": 243 }, { "epoch": 0.008738159614661485, "grad_norm": 2.3643414974212646, "learning_rate": 5.823389021479714e-05, "loss": 2.3041, "step": 244 }, { "epoch": 0.00877397174422977, "grad_norm": 2.646777868270874, "learning_rate": 5.847255369928402e-05, "loss": 1.8089, "step": 245 }, { "epoch": 0.008809783873798055, "grad_norm": 1.8658462762832642, "learning_rate": 5.871121718377088e-05, "loss": 2.2097, "step": 246 }, { "epoch": 0.008845596003366341, "grad_norm": 3.2187111377716064, "learning_rate": 5.894988066825776e-05, "loss": 2.3646, "step": 247 }, { "epoch": 0.008881408132934625, "grad_norm": 2.9397201538085938, "learning_rate": 5.918854415274463e-05, "loss": 1.8528, "step": 248 }, { "epoch": 0.00891722026250291, "grad_norm": 2.3005692958831787, "learning_rate": 5.942720763723151e-05, "loss": 1.6524, "step": 249 }, { "epoch": 0.008953032392071195, "grad_norm": 2.3031492233276367, "learning_rate": 5.966587112171838e-05, "loss": 1.9533, "step": 250 }, { "epoch": 0.00898884452163948, "grad_norm": 1.9477695226669312, "learning_rate": 5.990453460620525e-05, "loss": 1.8504, "step": 251 }, { "epoch": 0.009024656651207764, "grad_norm": 2.6072261333465576, "learning_rate": 6.0143198090692134e-05, "loss": 2.2766, "step": 252 }, { "epoch": 0.009060468780776048, "grad_norm": 1.8509987592697144, "learning_rate": 6.0381861575179e-05, "loss": 1.9699, "step": 253 }, { "epoch": 0.009096280910344333, "grad_norm": 2.4382662773132324, "learning_rate": 6.062052505966588e-05, "loss": 2.0657, "step": 254 }, { "epoch": 0.00913209303991262, "grad_norm": 2.3414926528930664, "learning_rate": 6.0859188544152745e-05, "loss": 2.0522, "step": 255 }, { "epoch": 0.009167905169480904, "grad_norm": 2.583178997039795, "learning_rate": 6.109785202863962e-05, "loss": 2.0446, "step": 256 }, { "epoch": 0.009203717299049188, "grad_norm": 4.623299598693848, "learning_rate": 6.133651551312649e-05, "loss": 2.2111, "step": 257 }, { "epoch": 0.009239529428617473, "grad_norm": 2.9717419147491455, "learning_rate": 6.157517899761337e-05, "loss": 2.0214, "step": 258 }, { "epoch": 0.009275341558185757, "grad_norm": 2.3703761100769043, "learning_rate": 6.181384248210024e-05, "loss": 2.0451, "step": 259 }, { "epoch": 0.009311153687754042, "grad_norm": 2.0671181678771973, "learning_rate": 6.205250596658712e-05, "loss": 1.9399, "step": 260 }, { "epoch": 0.009346965817322327, "grad_norm": 2.3600852489471436, "learning_rate": 6.2291169451074e-05, "loss": 1.9703, "step": 261 }, { "epoch": 0.009382777946890611, "grad_norm": 1.8281196355819702, "learning_rate": 6.252983293556087e-05, "loss": 1.9562, "step": 262 }, { "epoch": 0.009418590076458897, "grad_norm": 3.4217169284820557, "learning_rate": 6.276849642004773e-05, "loss": 2.1296, "step": 263 }, { "epoch": 0.009454402206027182, "grad_norm": 5.549176216125488, "learning_rate": 6.30071599045346e-05, "loss": 2.3743, "step": 264 }, { "epoch": 0.009490214335595467, "grad_norm": 1.707136869430542, "learning_rate": 6.324582338902148e-05, "loss": 1.9212, "step": 265 }, { "epoch": 0.009526026465163751, "grad_norm": 2.4918057918548584, "learning_rate": 6.348448687350835e-05, "loss": 2.0175, "step": 266 }, { "epoch": 0.009561838594732036, "grad_norm": 2.368617057800293, "learning_rate": 6.372315035799523e-05, "loss": 1.8226, "step": 267 }, { "epoch": 0.00959765072430032, "grad_norm": 2.9346976280212402, "learning_rate": 6.39618138424821e-05, "loss": 2.3414, "step": 268 }, { "epoch": 0.009633462853868605, "grad_norm": 2.7401106357574463, "learning_rate": 6.420047732696898e-05, "loss": 1.8663, "step": 269 }, { "epoch": 0.00966927498343689, "grad_norm": 2.8707168102264404, "learning_rate": 6.443914081145585e-05, "loss": 2.2932, "step": 270 }, { "epoch": 0.009705087113005174, "grad_norm": 2.1876916885375977, "learning_rate": 6.467780429594272e-05, "loss": 1.986, "step": 271 }, { "epoch": 0.00974089924257346, "grad_norm": 2.1263930797576904, "learning_rate": 6.49164677804296e-05, "loss": 1.8861, "step": 272 }, { "epoch": 0.009776711372141745, "grad_norm": 1.8206520080566406, "learning_rate": 6.515513126491647e-05, "loss": 1.8851, "step": 273 }, { "epoch": 0.00981252350171003, "grad_norm": 2.8688876628875732, "learning_rate": 6.539379474940335e-05, "loss": 1.8439, "step": 274 }, { "epoch": 0.009848335631278314, "grad_norm": 2.3246121406555176, "learning_rate": 6.563245823389022e-05, "loss": 2.1826, "step": 275 }, { "epoch": 0.009884147760846599, "grad_norm": 2.6065704822540283, "learning_rate": 6.58711217183771e-05, "loss": 2.229, "step": 276 }, { "epoch": 0.009919959890414883, "grad_norm": 2.4256670475006104, "learning_rate": 6.610978520286396e-05, "loss": 2.0945, "step": 277 }, { "epoch": 0.009955772019983168, "grad_norm": 1.7812882661819458, "learning_rate": 6.634844868735083e-05, "loss": 2.255, "step": 278 }, { "epoch": 0.009991584149551452, "grad_norm": 1.8207054138183594, "learning_rate": 6.65871121718377e-05, "loss": 1.6961, "step": 279 }, { "epoch": 0.010027396279119739, "grad_norm": 1.6386990547180176, "learning_rate": 6.682577565632458e-05, "loss": 1.856, "step": 280 }, { "epoch": 0.010063208408688023, "grad_norm": 3.287531614303589, "learning_rate": 6.706443914081146e-05, "loss": 2.3542, "step": 281 }, { "epoch": 0.010099020538256308, "grad_norm": 2.7156126499176025, "learning_rate": 6.730310262529833e-05, "loss": 1.8437, "step": 282 }, { "epoch": 0.010134832667824592, "grad_norm": 2.3014869689941406, "learning_rate": 6.754176610978521e-05, "loss": 2.447, "step": 283 }, { "epoch": 0.010170644797392877, "grad_norm": 2.7459521293640137, "learning_rate": 6.778042959427208e-05, "loss": 2.2216, "step": 284 }, { "epoch": 0.010206456926961162, "grad_norm": 2.640103816986084, "learning_rate": 6.801909307875896e-05, "loss": 2.0094, "step": 285 }, { "epoch": 0.010242269056529446, "grad_norm": 3.3041248321533203, "learning_rate": 6.825775656324583e-05, "loss": 2.6193, "step": 286 }, { "epoch": 0.01027808118609773, "grad_norm": 2.390202522277832, "learning_rate": 6.84964200477327e-05, "loss": 2.4139, "step": 287 }, { "epoch": 0.010313893315666015, "grad_norm": 2.879079580307007, "learning_rate": 6.873508353221956e-05, "loss": 2.1419, "step": 288 }, { "epoch": 0.010349705445234302, "grad_norm": 5.001767635345459, "learning_rate": 6.897374701670645e-05, "loss": 2.108, "step": 289 }, { "epoch": 0.010385517574802586, "grad_norm": 4.854882717132568, "learning_rate": 6.921241050119333e-05, "loss": 2.4277, "step": 290 }, { "epoch": 0.01042132970437087, "grad_norm": 2.883726119995117, "learning_rate": 6.94510739856802e-05, "loss": 2.0774, "step": 291 }, { "epoch": 0.010457141833939155, "grad_norm": 2.1024317741394043, "learning_rate": 6.968973747016708e-05, "loss": 2.1162, "step": 292 }, { "epoch": 0.01049295396350744, "grad_norm": 2.173642873764038, "learning_rate": 6.992840095465394e-05, "loss": 1.9923, "step": 293 }, { "epoch": 0.010528766093075724, "grad_norm": 2.249166250228882, "learning_rate": 7.016706443914081e-05, "loss": 2.0456, "step": 294 }, { "epoch": 0.010564578222644009, "grad_norm": 2.782399892807007, "learning_rate": 7.040572792362768e-05, "loss": 1.6617, "step": 295 }, { "epoch": 0.010600390352212294, "grad_norm": 2.9986255168914795, "learning_rate": 7.064439140811456e-05, "loss": 2.0264, "step": 296 }, { "epoch": 0.01063620248178058, "grad_norm": 1.7165770530700684, "learning_rate": 7.088305489260143e-05, "loss": 1.9433, "step": 297 }, { "epoch": 0.010672014611348864, "grad_norm": 2.9197936058044434, "learning_rate": 7.112171837708831e-05, "loss": 1.9866, "step": 298 }, { "epoch": 0.010707826740917149, "grad_norm": 3.9878487586975098, "learning_rate": 7.136038186157519e-05, "loss": 2.2649, "step": 299 }, { "epoch": 0.010743638870485434, "grad_norm": 2.6345372200012207, "learning_rate": 7.159904534606206e-05, "loss": 1.8249, "step": 300 }, { "epoch": 0.010779451000053718, "grad_norm": 2.4214723110198975, "learning_rate": 7.183770883054893e-05, "loss": 2.0411, "step": 301 }, { "epoch": 0.010815263129622003, "grad_norm": 3.2533586025238037, "learning_rate": 7.20763723150358e-05, "loss": 2.0109, "step": 302 }, { "epoch": 0.010851075259190287, "grad_norm": 2.417447566986084, "learning_rate": 7.231503579952268e-05, "loss": 1.996, "step": 303 }, { "epoch": 0.010886887388758572, "grad_norm": 3.2113840579986572, "learning_rate": 7.255369928400954e-05, "loss": 2.1155, "step": 304 }, { "epoch": 0.010922699518326858, "grad_norm": 2.6941497325897217, "learning_rate": 7.279236276849643e-05, "loss": 2.1457, "step": 305 }, { "epoch": 0.010958511647895143, "grad_norm": 1.8207515478134155, "learning_rate": 7.30310262529833e-05, "loss": 1.9003, "step": 306 }, { "epoch": 0.010994323777463427, "grad_norm": 3.3273072242736816, "learning_rate": 7.326968973747017e-05, "loss": 2.2891, "step": 307 }, { "epoch": 0.011030135907031712, "grad_norm": 1.6675465106964111, "learning_rate": 7.350835322195704e-05, "loss": 1.9006, "step": 308 }, { "epoch": 0.011065948036599996, "grad_norm": 2.2108771800994873, "learning_rate": 7.374701670644391e-05, "loss": 2.0193, "step": 309 }, { "epoch": 0.011101760166168281, "grad_norm": 1.6703202724456787, "learning_rate": 7.398568019093079e-05, "loss": 1.7437, "step": 310 }, { "epoch": 0.011137572295736566, "grad_norm": 2.188809394836426, "learning_rate": 7.422434367541766e-05, "loss": 1.97, "step": 311 }, { "epoch": 0.01117338442530485, "grad_norm": 2.2568600177764893, "learning_rate": 7.446300715990454e-05, "loss": 2.1866, "step": 312 }, { "epoch": 0.011209196554873135, "grad_norm": 2.189673662185669, "learning_rate": 7.470167064439141e-05, "loss": 1.8975, "step": 313 }, { "epoch": 0.011245008684441421, "grad_norm": 1.8501036167144775, "learning_rate": 7.494033412887829e-05, "loss": 2.0104, "step": 314 }, { "epoch": 0.011280820814009706, "grad_norm": 3.4657652378082275, "learning_rate": 7.517899761336516e-05, "loss": 2.1677, "step": 315 }, { "epoch": 0.01131663294357799, "grad_norm": 1.7842155694961548, "learning_rate": 7.541766109785204e-05, "loss": 2.162, "step": 316 }, { "epoch": 0.011352445073146275, "grad_norm": 2.6685571670532227, "learning_rate": 7.565632458233891e-05, "loss": 2.2024, "step": 317 }, { "epoch": 0.01138825720271456, "grad_norm": 2.2792065143585205, "learning_rate": 7.589498806682577e-05, "loss": 2.0813, "step": 318 }, { "epoch": 0.011424069332282844, "grad_norm": 1.8221955299377441, "learning_rate": 7.613365155131266e-05, "loss": 1.9769, "step": 319 }, { "epoch": 0.011459881461851128, "grad_norm": 4.3000922203063965, "learning_rate": 7.637231503579952e-05, "loss": 2.301, "step": 320 }, { "epoch": 0.011495693591419413, "grad_norm": 1.8403431177139282, "learning_rate": 7.66109785202864e-05, "loss": 1.9934, "step": 321 }, { "epoch": 0.0115315057209877, "grad_norm": 2.3018696308135986, "learning_rate": 7.684964200477327e-05, "loss": 1.9003, "step": 322 }, { "epoch": 0.011567317850555984, "grad_norm": 2.887930393218994, "learning_rate": 7.708830548926015e-05, "loss": 2.5743, "step": 323 }, { "epoch": 0.011603129980124268, "grad_norm": 1.5166923999786377, "learning_rate": 7.732696897374702e-05, "loss": 1.8831, "step": 324 }, { "epoch": 0.011638942109692553, "grad_norm": 2.4193575382232666, "learning_rate": 7.756563245823389e-05, "loss": 2.0361, "step": 325 }, { "epoch": 0.011674754239260838, "grad_norm": 1.8523463010787964, "learning_rate": 7.780429594272077e-05, "loss": 1.9684, "step": 326 }, { "epoch": 0.011710566368829122, "grad_norm": 2.535493850708008, "learning_rate": 7.804295942720764e-05, "loss": 2.1888, "step": 327 }, { "epoch": 0.011746378498397407, "grad_norm": 2.134873390197754, "learning_rate": 7.828162291169452e-05, "loss": 1.826, "step": 328 }, { "epoch": 0.011782190627965691, "grad_norm": 2.6702308654785156, "learning_rate": 7.852028639618139e-05, "loss": 1.9467, "step": 329 }, { "epoch": 0.011818002757533978, "grad_norm": 1.7101609706878662, "learning_rate": 7.875894988066827e-05, "loss": 2.0146, "step": 330 }, { "epoch": 0.011853814887102262, "grad_norm": 1.9963518381118774, "learning_rate": 7.899761336515514e-05, "loss": 1.8542, "step": 331 }, { "epoch": 0.011889627016670547, "grad_norm": 2.23158597946167, "learning_rate": 7.9236276849642e-05, "loss": 1.895, "step": 332 }, { "epoch": 0.011925439146238831, "grad_norm": 2.7184247970581055, "learning_rate": 7.947494033412887e-05, "loss": 2.2446, "step": 333 }, { "epoch": 0.011961251275807116, "grad_norm": 2.315206527709961, "learning_rate": 7.971360381861575e-05, "loss": 2.2946, "step": 334 }, { "epoch": 0.0119970634053754, "grad_norm": 2.8590247631073, "learning_rate": 7.995226730310262e-05, "loss": 2.0968, "step": 335 }, { "epoch": 0.012032875534943685, "grad_norm": 2.333500623703003, "learning_rate": 8.01909307875895e-05, "loss": 2.2555, "step": 336 }, { "epoch": 0.01206868766451197, "grad_norm": 1.6746714115142822, "learning_rate": 8.042959427207638e-05, "loss": 1.9062, "step": 337 }, { "epoch": 0.012104499794080254, "grad_norm": 2.6714026927948, "learning_rate": 8.066825775656325e-05, "loss": 2.5086, "step": 338 }, { "epoch": 0.01214031192364854, "grad_norm": 4.634089469909668, "learning_rate": 8.090692124105012e-05, "loss": 2.0091, "step": 339 }, { "epoch": 0.012176124053216825, "grad_norm": 2.3018248081207275, "learning_rate": 8.114558472553699e-05, "loss": 2.1707, "step": 340 }, { "epoch": 0.01221193618278511, "grad_norm": 2.3117361068725586, "learning_rate": 8.138424821002387e-05, "loss": 2.0692, "step": 341 }, { "epoch": 0.012247748312353394, "grad_norm": 2.316091299057007, "learning_rate": 8.162291169451074e-05, "loss": 2.1788, "step": 342 }, { "epoch": 0.012283560441921679, "grad_norm": 1.7986754179000854, "learning_rate": 8.186157517899762e-05, "loss": 2.0328, "step": 343 }, { "epoch": 0.012319372571489963, "grad_norm": 2.5290677547454834, "learning_rate": 8.210023866348449e-05, "loss": 2.2345, "step": 344 }, { "epoch": 0.012355184701058248, "grad_norm": 1.8213863372802734, "learning_rate": 8.233890214797137e-05, "loss": 1.833, "step": 345 }, { "epoch": 0.012390996830626532, "grad_norm": 2.757683753967285, "learning_rate": 8.257756563245825e-05, "loss": 2.2434, "step": 346 }, { "epoch": 0.012426808960194819, "grad_norm": 2.7653613090515137, "learning_rate": 8.28162291169451e-05, "loss": 1.9301, "step": 347 }, { "epoch": 0.012462621089763103, "grad_norm": 2.544404983520508, "learning_rate": 8.305489260143198e-05, "loss": 2.1534, "step": 348 }, { "epoch": 0.012498433219331388, "grad_norm": 2.3338091373443604, "learning_rate": 8.329355608591885e-05, "loss": 2.1738, "step": 349 }, { "epoch": 0.012534245348899672, "grad_norm": 2.3454766273498535, "learning_rate": 8.353221957040573e-05, "loss": 2.1207, "step": 350 }, { "epoch": 0.012570057478467957, "grad_norm": 2.4842939376831055, "learning_rate": 8.37708830548926e-05, "loss": 1.9311, "step": 351 }, { "epoch": 0.012605869608036242, "grad_norm": 3.1244163513183594, "learning_rate": 8.400954653937948e-05, "loss": 1.8968, "step": 352 }, { "epoch": 0.012641681737604526, "grad_norm": 1.9500224590301514, "learning_rate": 8.424821002386635e-05, "loss": 1.9746, "step": 353 }, { "epoch": 0.01267749386717281, "grad_norm": 2.537527561187744, "learning_rate": 8.448687350835323e-05, "loss": 1.8227, "step": 354 }, { "epoch": 0.012713305996741097, "grad_norm": 1.9497244358062744, "learning_rate": 8.47255369928401e-05, "loss": 1.9673, "step": 355 }, { "epoch": 0.012749118126309382, "grad_norm": 2.9225804805755615, "learning_rate": 8.496420047732697e-05, "loss": 2.253, "step": 356 }, { "epoch": 0.012784930255877666, "grad_norm": 2.1374170780181885, "learning_rate": 8.520286396181385e-05, "loss": 1.8416, "step": 357 }, { "epoch": 0.01282074238544595, "grad_norm": 2.318568468093872, "learning_rate": 8.544152744630072e-05, "loss": 2.1213, "step": 358 }, { "epoch": 0.012856554515014235, "grad_norm": 1.6215802431106567, "learning_rate": 8.56801909307876e-05, "loss": 1.9469, "step": 359 }, { "epoch": 0.01289236664458252, "grad_norm": 2.4686646461486816, "learning_rate": 8.591885441527447e-05, "loss": 2.2494, "step": 360 }, { "epoch": 0.012928178774150804, "grad_norm": 1.9479748010635376, "learning_rate": 8.615751789976135e-05, "loss": 2.0661, "step": 361 }, { "epoch": 0.012963990903719089, "grad_norm": 2.651142120361328, "learning_rate": 8.639618138424822e-05, "loss": 2.2081, "step": 362 }, { "epoch": 0.012999803033287374, "grad_norm": 2.298008441925049, "learning_rate": 8.663484486873508e-05, "loss": 2.0353, "step": 363 }, { "epoch": 0.01303561516285566, "grad_norm": 2.419844388961792, "learning_rate": 8.687350835322196e-05, "loss": 1.8743, "step": 364 }, { "epoch": 0.013071427292423944, "grad_norm": 1.65255868434906, "learning_rate": 8.711217183770883e-05, "loss": 1.9959, "step": 365 }, { "epoch": 0.013107239421992229, "grad_norm": 2.8441193103790283, "learning_rate": 8.735083532219571e-05, "loss": 1.9528, "step": 366 }, { "epoch": 0.013143051551560514, "grad_norm": 2.9021151065826416, "learning_rate": 8.758949880668258e-05, "loss": 1.7156, "step": 367 }, { "epoch": 0.013178863681128798, "grad_norm": 2.2067577838897705, "learning_rate": 8.782816229116946e-05, "loss": 1.9428, "step": 368 }, { "epoch": 0.013214675810697083, "grad_norm": 1.5661609172821045, "learning_rate": 8.806682577565633e-05, "loss": 1.7973, "step": 369 }, { "epoch": 0.013250487940265367, "grad_norm": 2.014951467514038, "learning_rate": 8.83054892601432e-05, "loss": 2.0513, "step": 370 }, { "epoch": 0.013286300069833652, "grad_norm": 2.001088857650757, "learning_rate": 8.854415274463007e-05, "loss": 2.0834, "step": 371 }, { "epoch": 0.013322112199401938, "grad_norm": 3.38238787651062, "learning_rate": 8.878281622911695e-05, "loss": 1.9486, "step": 372 }, { "epoch": 0.013357924328970223, "grad_norm": 2.687730550765991, "learning_rate": 8.902147971360383e-05, "loss": 2.0036, "step": 373 }, { "epoch": 0.013393736458538507, "grad_norm": 2.138909101486206, "learning_rate": 8.92601431980907e-05, "loss": 1.8376, "step": 374 }, { "epoch": 0.013429548588106792, "grad_norm": 2.748356580734253, "learning_rate": 8.949880668257758e-05, "loss": 2.4684, "step": 375 }, { "epoch": 0.013465360717675076, "grad_norm": 1.895658016204834, "learning_rate": 8.973747016706445e-05, "loss": 2.1803, "step": 376 }, { "epoch": 0.013501172847243361, "grad_norm": 2.7984509468078613, "learning_rate": 8.997613365155131e-05, "loss": 2.166, "step": 377 }, { "epoch": 0.013536984976811646, "grad_norm": 4.1571173667907715, "learning_rate": 9.021479713603818e-05, "loss": 2.1634, "step": 378 }, { "epoch": 0.01357279710637993, "grad_norm": 2.012040853500366, "learning_rate": 9.045346062052506e-05, "loss": 1.6297, "step": 379 }, { "epoch": 0.013608609235948217, "grad_norm": 1.9994226694107056, "learning_rate": 9.069212410501193e-05, "loss": 1.7911, "step": 380 }, { "epoch": 0.013644421365516501, "grad_norm": 2.6208250522613525, "learning_rate": 9.093078758949881e-05, "loss": 1.94, "step": 381 }, { "epoch": 0.013680233495084786, "grad_norm": 3.200495719909668, "learning_rate": 9.116945107398569e-05, "loss": 1.9532, "step": 382 }, { "epoch": 0.01371604562465307, "grad_norm": 1.4798295497894287, "learning_rate": 9.140811455847256e-05, "loss": 2.0838, "step": 383 }, { "epoch": 0.013751857754221355, "grad_norm": 2.466656446456909, "learning_rate": 9.164677804295944e-05, "loss": 2.0837, "step": 384 }, { "epoch": 0.01378766988378964, "grad_norm": 2.8002874851226807, "learning_rate": 9.18854415274463e-05, "loss": 2.3469, "step": 385 }, { "epoch": 0.013823482013357924, "grad_norm": 2.267125368118286, "learning_rate": 9.212410501193318e-05, "loss": 2.1056, "step": 386 }, { "epoch": 0.013859294142926208, "grad_norm": 2.0536201000213623, "learning_rate": 9.236276849642005e-05, "loss": 2.2548, "step": 387 }, { "epoch": 0.013895106272494493, "grad_norm": 1.349810242652893, "learning_rate": 9.260143198090693e-05, "loss": 2.0713, "step": 388 }, { "epoch": 0.01393091840206278, "grad_norm": 2.174856424331665, "learning_rate": 9.28400954653938e-05, "loss": 1.9262, "step": 389 }, { "epoch": 0.013966730531631064, "grad_norm": 1.979978322982788, "learning_rate": 9.307875894988068e-05, "loss": 2.2743, "step": 390 }, { "epoch": 0.014002542661199349, "grad_norm": 1.4002437591552734, "learning_rate": 9.331742243436754e-05, "loss": 1.8055, "step": 391 }, { "epoch": 0.014038354790767633, "grad_norm": 1.8472005128860474, "learning_rate": 9.355608591885443e-05, "loss": 2.1736, "step": 392 }, { "epoch": 0.014074166920335918, "grad_norm": 1.950952172279358, "learning_rate": 9.379474940334129e-05, "loss": 2.1521, "step": 393 }, { "epoch": 0.014109979049904202, "grad_norm": 1.622833251953125, "learning_rate": 9.403341288782816e-05, "loss": 1.8347, "step": 394 }, { "epoch": 0.014145791179472487, "grad_norm": 2.020577907562256, "learning_rate": 9.427207637231504e-05, "loss": 2.0858, "step": 395 }, { "epoch": 0.014181603309040771, "grad_norm": 1.5022003650665283, "learning_rate": 9.451073985680191e-05, "loss": 1.8131, "step": 396 }, { "epoch": 0.014217415438609058, "grad_norm": 1.293748140335083, "learning_rate": 9.474940334128879e-05, "loss": 1.7116, "step": 397 }, { "epoch": 0.014253227568177342, "grad_norm": 3.8732552528381348, "learning_rate": 9.498806682577566e-05, "loss": 2.359, "step": 398 }, { "epoch": 0.014289039697745627, "grad_norm": 2.218346118927002, "learning_rate": 9.522673031026254e-05, "loss": 2.3042, "step": 399 }, { "epoch": 0.014324851827313911, "grad_norm": 1.8491095304489136, "learning_rate": 9.546539379474941e-05, "loss": 1.8915, "step": 400 }, { "epoch": 0.014360663956882196, "grad_norm": 2.9908032417297363, "learning_rate": 9.570405727923628e-05, "loss": 2.0991, "step": 401 }, { "epoch": 0.01439647608645048, "grad_norm": 6.480159282684326, "learning_rate": 9.594272076372316e-05, "loss": 2.5134, "step": 402 }, { "epoch": 0.014432288216018765, "grad_norm": 1.737426996231079, "learning_rate": 9.618138424821003e-05, "loss": 1.8967, "step": 403 }, { "epoch": 0.01446810034558705, "grad_norm": 1.8148224353790283, "learning_rate": 9.64200477326969e-05, "loss": 1.7998, "step": 404 }, { "epoch": 0.014503912475155336, "grad_norm": 2.05126690864563, "learning_rate": 9.665871121718377e-05, "loss": 1.8313, "step": 405 }, { "epoch": 0.01453972460472362, "grad_norm": 3.2550830841064453, "learning_rate": 9.689737470167066e-05, "loss": 2.2962, "step": 406 }, { "epoch": 0.014575536734291905, "grad_norm": 2.001216411590576, "learning_rate": 9.713603818615752e-05, "loss": 2.031, "step": 407 }, { "epoch": 0.01461134886386019, "grad_norm": 1.8144034147262573, "learning_rate": 9.737470167064439e-05, "loss": 1.675, "step": 408 }, { "epoch": 0.014647160993428474, "grad_norm": 2.0882346630096436, "learning_rate": 9.761336515513126e-05, "loss": 2.0904, "step": 409 }, { "epoch": 0.014682973122996759, "grad_norm": 2.1910388469696045, "learning_rate": 9.785202863961814e-05, "loss": 1.8463, "step": 410 }, { "epoch": 0.014718785252565043, "grad_norm": 1.9278587102890015, "learning_rate": 9.809069212410502e-05, "loss": 2.0166, "step": 411 }, { "epoch": 0.014754597382133328, "grad_norm": 2.418215751647949, "learning_rate": 9.832935560859189e-05, "loss": 2.3441, "step": 412 }, { "epoch": 0.014790409511701613, "grad_norm": 1.1429497003555298, "learning_rate": 9.856801909307877e-05, "loss": 1.628, "step": 413 }, { "epoch": 0.014826221641269899, "grad_norm": 2.355159044265747, "learning_rate": 9.880668257756564e-05, "loss": 2.1252, "step": 414 }, { "epoch": 0.014862033770838183, "grad_norm": 1.5233746767044067, "learning_rate": 9.90453460620525e-05, "loss": 1.9219, "step": 415 }, { "epoch": 0.014897845900406468, "grad_norm": 1.9501924514770508, "learning_rate": 9.928400954653937e-05, "loss": 1.6977, "step": 416 }, { "epoch": 0.014933658029974753, "grad_norm": 2.0013651847839355, "learning_rate": 9.952267303102626e-05, "loss": 2.1085, "step": 417 }, { "epoch": 0.014969470159543037, "grad_norm": 2.1112027168273926, "learning_rate": 9.976133651551312e-05, "loss": 2.0397, "step": 418 }, { "epoch": 0.015005282289111322, "grad_norm": 1.9288190603256226, "learning_rate": 0.0001, "loss": 2.0879, "step": 419 }, { "epoch": 0.015041094418679606, "grad_norm": 2.2626709938049316, "learning_rate": 0.00010023866348448687, "loss": 2.2272, "step": 420 }, { "epoch": 0.01507690654824789, "grad_norm": 1.7174164056777954, "learning_rate": 0.00010047732696897377, "loss": 1.9436, "step": 421 }, { "epoch": 0.015112718677816177, "grad_norm": 2.5441973209381104, "learning_rate": 0.00010071599045346064, "loss": 2.2915, "step": 422 }, { "epoch": 0.015148530807384462, "grad_norm": 1.7958893775939941, "learning_rate": 0.0001009546539379475, "loss": 1.9233, "step": 423 }, { "epoch": 0.015184342936952746, "grad_norm": 2.6941654682159424, "learning_rate": 0.00010119331742243436, "loss": 2.0398, "step": 424 }, { "epoch": 0.01522015506652103, "grad_norm": 2.385948657989502, "learning_rate": 0.00010143198090692125, "loss": 2.2552, "step": 425 }, { "epoch": 0.015255967196089315, "grad_norm": 2.532027244567871, "learning_rate": 0.00010167064439140812, "loss": 2.3462, "step": 426 }, { "epoch": 0.0152917793256576, "grad_norm": 1.7785552740097046, "learning_rate": 0.00010190930787589499, "loss": 2.1202, "step": 427 }, { "epoch": 0.015327591455225885, "grad_norm": 4.266390323638916, "learning_rate": 0.00010214797136038186, "loss": 1.7885, "step": 428 }, { "epoch": 0.015363403584794169, "grad_norm": 1.9129207134246826, "learning_rate": 0.00010238663484486875, "loss": 1.9795, "step": 429 }, { "epoch": 0.015399215714362455, "grad_norm": 2.3568286895751953, "learning_rate": 0.00010262529832935562, "loss": 2.1897, "step": 430 }, { "epoch": 0.01543502784393074, "grad_norm": 2.463308811187744, "learning_rate": 0.00010286396181384249, "loss": 2.191, "step": 431 }, { "epoch": 0.015470839973499025, "grad_norm": 2.999436855316162, "learning_rate": 0.00010310262529832937, "loss": 2.1384, "step": 432 }, { "epoch": 0.01550665210306731, "grad_norm": 2.8959248065948486, "learning_rate": 0.00010334128878281624, "loss": 2.5383, "step": 433 }, { "epoch": 0.015542464232635594, "grad_norm": 1.6721136569976807, "learning_rate": 0.0001035799522673031, "loss": 2.1312, "step": 434 }, { "epoch": 0.015578276362203878, "grad_norm": 2.227095365524292, "learning_rate": 0.00010381861575178997, "loss": 1.9225, "step": 435 }, { "epoch": 0.015614088491772163, "grad_norm": 2.0040481090545654, "learning_rate": 0.00010405727923627687, "loss": 1.9404, "step": 436 }, { "epoch": 0.01564990062134045, "grad_norm": 2.118105173110962, "learning_rate": 0.00010429594272076373, "loss": 2.0456, "step": 437 }, { "epoch": 0.015685712750908732, "grad_norm": 1.629913568496704, "learning_rate": 0.0001045346062052506, "loss": 1.9724, "step": 438 }, { "epoch": 0.01572152488047702, "grad_norm": 1.544683575630188, "learning_rate": 0.00010477326968973748, "loss": 2.3597, "step": 439 }, { "epoch": 0.0157573370100453, "grad_norm": 2.007746934890747, "learning_rate": 0.00010501193317422435, "loss": 2.1682, "step": 440 }, { "epoch": 0.015793149139613587, "grad_norm": 2.2837698459625244, "learning_rate": 0.00010525059665871122, "loss": 2.0777, "step": 441 }, { "epoch": 0.01582896126918187, "grad_norm": 2.146202802658081, "learning_rate": 0.00010548926014319809, "loss": 2.0073, "step": 442 }, { "epoch": 0.015864773398750157, "grad_norm": 2.1583595275878906, "learning_rate": 0.00010572792362768498, "loss": 1.7767, "step": 443 }, { "epoch": 0.015900585528318443, "grad_norm": 2.5789666175842285, "learning_rate": 0.00010596658711217185, "loss": 2.2289, "step": 444 }, { "epoch": 0.015936397657886726, "grad_norm": 2.8515987396240234, "learning_rate": 0.00010620525059665872, "loss": 2.1816, "step": 445 }, { "epoch": 0.015972209787455012, "grad_norm": 2.2290050983428955, "learning_rate": 0.00010644391408114558, "loss": 2.1232, "step": 446 }, { "epoch": 0.016008021917023295, "grad_norm": 1.601799726486206, "learning_rate": 0.00010668257756563247, "loss": 2.1464, "step": 447 }, { "epoch": 0.01604383404659158, "grad_norm": 1.905332088470459, "learning_rate": 0.00010692124105011933, "loss": 1.8095, "step": 448 }, { "epoch": 0.016079646176159864, "grad_norm": 2.2088615894317627, "learning_rate": 0.0001071599045346062, "loss": 2.4517, "step": 449 }, { "epoch": 0.01611545830572815, "grad_norm": 1.835677146911621, "learning_rate": 0.0001073985680190931, "loss": 2.1423, "step": 450 }, { "epoch": 0.016151270435296437, "grad_norm": 1.556067705154419, "learning_rate": 0.00010763723150357996, "loss": 1.8783, "step": 451 }, { "epoch": 0.01618708256486472, "grad_norm": 2.0234811305999756, "learning_rate": 0.00010787589498806683, "loss": 2.0476, "step": 452 }, { "epoch": 0.016222894694433006, "grad_norm": 2.6546096801757812, "learning_rate": 0.0001081145584725537, "loss": 1.9325, "step": 453 }, { "epoch": 0.01625870682400129, "grad_norm": 1.8908900022506714, "learning_rate": 0.00010835322195704058, "loss": 1.9481, "step": 454 }, { "epoch": 0.016294518953569575, "grad_norm": 1.8198904991149902, "learning_rate": 0.00010859188544152745, "loss": 1.918, "step": 455 }, { "epoch": 0.016330331083137858, "grad_norm": 1.573096752166748, "learning_rate": 0.00010883054892601432, "loss": 2.0613, "step": 456 }, { "epoch": 0.016366143212706144, "grad_norm": 1.9454529285430908, "learning_rate": 0.00010906921241050121, "loss": 1.9583, "step": 457 }, { "epoch": 0.016401955342274427, "grad_norm": 1.7274153232574463, "learning_rate": 0.00010930787589498808, "loss": 1.7381, "step": 458 }, { "epoch": 0.016437767471842713, "grad_norm": 1.4126781225204468, "learning_rate": 0.00010954653937947495, "loss": 1.7259, "step": 459 }, { "epoch": 0.016473579601411, "grad_norm": 2.398538589477539, "learning_rate": 0.00010978520286396181, "loss": 2.2239, "step": 460 }, { "epoch": 0.016509391730979282, "grad_norm": 1.7049897909164429, "learning_rate": 0.0001100238663484487, "loss": 1.7488, "step": 461 }, { "epoch": 0.01654520386054757, "grad_norm": 1.9099923372268677, "learning_rate": 0.00011026252983293556, "loss": 1.8568, "step": 462 }, { "epoch": 0.01658101599011585, "grad_norm": 1.5508618354797363, "learning_rate": 0.00011050119331742243, "loss": 1.9427, "step": 463 }, { "epoch": 0.016616828119684138, "grad_norm": 3.0816149711608887, "learning_rate": 0.0001107398568019093, "loss": 2.3965, "step": 464 }, { "epoch": 0.01665264024925242, "grad_norm": 1.8753119707107544, "learning_rate": 0.0001109785202863962, "loss": 1.9468, "step": 465 }, { "epoch": 0.016688452378820707, "grad_norm": 1.9726189374923706, "learning_rate": 0.00011121718377088306, "loss": 1.7688, "step": 466 }, { "epoch": 0.01672426450838899, "grad_norm": 1.5842738151550293, "learning_rate": 0.00011145584725536993, "loss": 1.8443, "step": 467 }, { "epoch": 0.016760076637957276, "grad_norm": 2.3097848892211914, "learning_rate": 0.00011169451073985681, "loss": 2.4292, "step": 468 }, { "epoch": 0.016795888767525562, "grad_norm": 1.7131446599960327, "learning_rate": 0.00011193317422434368, "loss": 1.9989, "step": 469 }, { "epoch": 0.016831700897093845, "grad_norm": 1.544679045677185, "learning_rate": 0.00011217183770883055, "loss": 1.9196, "step": 470 }, { "epoch": 0.01686751302666213, "grad_norm": 2.726846218109131, "learning_rate": 0.00011241050119331741, "loss": 2.1967, "step": 471 }, { "epoch": 0.016903325156230414, "grad_norm": 3.2844207286834717, "learning_rate": 0.00011264916467780431, "loss": 1.8285, "step": 472 }, { "epoch": 0.0169391372857987, "grad_norm": 1.295161247253418, "learning_rate": 0.00011288782816229118, "loss": 1.9555, "step": 473 }, { "epoch": 0.016974949415366983, "grad_norm": 2.582324266433716, "learning_rate": 0.00011312649164677805, "loss": 2.1132, "step": 474 }, { "epoch": 0.01701076154493527, "grad_norm": 1.7749077081680298, "learning_rate": 0.00011336515513126493, "loss": 2.0865, "step": 475 }, { "epoch": 0.017046573674503556, "grad_norm": 2.0523269176483154, "learning_rate": 0.0001136038186157518, "loss": 2.2231, "step": 476 }, { "epoch": 0.01708238580407184, "grad_norm": 3.3681750297546387, "learning_rate": 0.00011384248210023866, "loss": 2.6565, "step": 477 }, { "epoch": 0.017118197933640125, "grad_norm": 2.334472417831421, "learning_rate": 0.00011408114558472553, "loss": 2.1217, "step": 478 }, { "epoch": 0.017154010063208408, "grad_norm": 2.677645444869995, "learning_rate": 0.00011431980906921242, "loss": 2.2404, "step": 479 }, { "epoch": 0.017189822192776694, "grad_norm": 2.0976386070251465, "learning_rate": 0.00011455847255369929, "loss": 2.2931, "step": 480 }, { "epoch": 0.017225634322344977, "grad_norm": 1.753631591796875, "learning_rate": 0.00011479713603818616, "loss": 1.9063, "step": 481 }, { "epoch": 0.017261446451913263, "grad_norm": 1.7365729808807373, "learning_rate": 0.00011503579952267303, "loss": 1.9847, "step": 482 }, { "epoch": 0.017297258581481546, "grad_norm": 2.4236369132995605, "learning_rate": 0.00011527446300715991, "loss": 1.9288, "step": 483 }, { "epoch": 0.017333070711049833, "grad_norm": 1.8561084270477295, "learning_rate": 0.00011551312649164678, "loss": 1.743, "step": 484 }, { "epoch": 0.01736888284061812, "grad_norm": 1.5186161994934082, "learning_rate": 0.00011575178997613365, "loss": 2.0968, "step": 485 }, { "epoch": 0.017404694970186402, "grad_norm": 2.043816089630127, "learning_rate": 0.00011599045346062054, "loss": 2.2149, "step": 486 }, { "epoch": 0.017440507099754688, "grad_norm": 2.2540132999420166, "learning_rate": 0.00011622911694510741, "loss": 2.089, "step": 487 }, { "epoch": 0.01747631922932297, "grad_norm": 1.8196076154708862, "learning_rate": 0.00011646778042959428, "loss": 2.051, "step": 488 }, { "epoch": 0.017512131358891257, "grad_norm": 1.6149146556854248, "learning_rate": 0.00011670644391408114, "loss": 1.991, "step": 489 }, { "epoch": 0.01754794348845954, "grad_norm": 1.9776016473770142, "learning_rate": 0.00011694510739856804, "loss": 2.2675, "step": 490 }, { "epoch": 0.017583755618027826, "grad_norm": 2.040417432785034, "learning_rate": 0.0001171837708830549, "loss": 2.0448, "step": 491 }, { "epoch": 0.01761956774759611, "grad_norm": 1.90510094165802, "learning_rate": 0.00011742243436754176, "loss": 2.0016, "step": 492 }, { "epoch": 0.017655379877164395, "grad_norm": 1.4178807735443115, "learning_rate": 0.00011766109785202863, "loss": 2.1013, "step": 493 }, { "epoch": 0.017691192006732682, "grad_norm": 3.005915641784668, "learning_rate": 0.00011789976133651552, "loss": 2.3571, "step": 494 }, { "epoch": 0.017727004136300965, "grad_norm": 1.7287548780441284, "learning_rate": 0.00011813842482100239, "loss": 1.8362, "step": 495 }, { "epoch": 0.01776281626586925, "grad_norm": 2.2629318237304688, "learning_rate": 0.00011837708830548926, "loss": 2.0679, "step": 496 }, { "epoch": 0.017798628395437534, "grad_norm": 2.3710145950317383, "learning_rate": 0.00011861575178997615, "loss": 2.2409, "step": 497 }, { "epoch": 0.01783444052500582, "grad_norm": 2.7783234119415283, "learning_rate": 0.00011885441527446302, "loss": 2.2683, "step": 498 }, { "epoch": 0.017870252654574103, "grad_norm": 2.343177556991577, "learning_rate": 0.00011909307875894989, "loss": 1.9737, "step": 499 }, { "epoch": 0.01790606478414239, "grad_norm": 1.3416262865066528, "learning_rate": 0.00011933174224343676, "loss": 1.7674, "step": 500 }, { "epoch": 0.017941876913710676, "grad_norm": 2.2444570064544678, "learning_rate": 0.00011957040572792364, "loss": 2.3294, "step": 501 }, { "epoch": 0.01797768904327896, "grad_norm": 2.3115077018737793, "learning_rate": 0.0001198090692124105, "loss": 2.3566, "step": 502 }, { "epoch": 0.018013501172847245, "grad_norm": 1.9268572330474854, "learning_rate": 0.00012004773269689737, "loss": 1.8323, "step": 503 }, { "epoch": 0.018049313302415528, "grad_norm": 3.032489061355591, "learning_rate": 0.00012028639618138427, "loss": 1.9294, "step": 504 }, { "epoch": 0.018085125431983814, "grad_norm": 1.1588308811187744, "learning_rate": 0.00012052505966587114, "loss": 1.9001, "step": 505 }, { "epoch": 0.018120937561552097, "grad_norm": 1.5301387310028076, "learning_rate": 0.000120763723150358, "loss": 1.9317, "step": 506 }, { "epoch": 0.018156749691120383, "grad_norm": 2.175445079803467, "learning_rate": 0.00012100238663484487, "loss": 1.7914, "step": 507 }, { "epoch": 0.018192561820688666, "grad_norm": 1.8067626953125, "learning_rate": 0.00012124105011933175, "loss": 1.9488, "step": 508 }, { "epoch": 0.018228373950256952, "grad_norm": 2.2004895210266113, "learning_rate": 0.00012147971360381862, "loss": 1.9453, "step": 509 }, { "epoch": 0.01826418607982524, "grad_norm": 1.2897497415542603, "learning_rate": 0.00012171837708830549, "loss": 1.8825, "step": 510 }, { "epoch": 0.01829999820939352, "grad_norm": 1.6512136459350586, "learning_rate": 0.00012195704057279236, "loss": 2.1278, "step": 511 }, { "epoch": 0.018335810338961808, "grad_norm": 1.7471429109573364, "learning_rate": 0.00012219570405727924, "loss": 1.994, "step": 512 }, { "epoch": 0.01837162246853009, "grad_norm": 1.75509774684906, "learning_rate": 0.0001224343675417661, "loss": 1.9485, "step": 513 }, { "epoch": 0.018407434598098377, "grad_norm": 1.647789478302002, "learning_rate": 0.00012267303102625297, "loss": 1.8526, "step": 514 }, { "epoch": 0.01844324672766666, "grad_norm": 1.4581859111785889, "learning_rate": 0.00012291169451073987, "loss": 1.9781, "step": 515 }, { "epoch": 0.018479058857234946, "grad_norm": 1.680022120475769, "learning_rate": 0.00012315035799522674, "loss": 1.9252, "step": 516 }, { "epoch": 0.01851487098680323, "grad_norm": 1.8121978044509888, "learning_rate": 0.0001233890214797136, "loss": 1.5773, "step": 517 }, { "epoch": 0.018550683116371515, "grad_norm": 2.650664806365967, "learning_rate": 0.00012362768496420047, "loss": 2.358, "step": 518 }, { "epoch": 0.0185864952459398, "grad_norm": 1.9430338144302368, "learning_rate": 0.00012386634844868737, "loss": 2.1443, "step": 519 }, { "epoch": 0.018622307375508084, "grad_norm": 1.728421688079834, "learning_rate": 0.00012410501193317423, "loss": 1.8808, "step": 520 }, { "epoch": 0.01865811950507637, "grad_norm": 2.1015219688415527, "learning_rate": 0.0001243436754176611, "loss": 1.8069, "step": 521 }, { "epoch": 0.018693931634644653, "grad_norm": 3.219109058380127, "learning_rate": 0.000124582338902148, "loss": 1.9532, "step": 522 }, { "epoch": 0.01872974376421294, "grad_norm": 1.7970722913742065, "learning_rate": 0.00012482100238663487, "loss": 2.1112, "step": 523 }, { "epoch": 0.018765555893781222, "grad_norm": 2.274960517883301, "learning_rate": 0.00012505966587112173, "loss": 1.6838, "step": 524 }, { "epoch": 0.01880136802334951, "grad_norm": 1.3818376064300537, "learning_rate": 0.0001252983293556086, "loss": 1.9166, "step": 525 }, { "epoch": 0.018837180152917795, "grad_norm": 1.6130000352859497, "learning_rate": 0.00012553699284009547, "loss": 2.2296, "step": 526 }, { "epoch": 0.018872992282486078, "grad_norm": 1.9274317026138306, "learning_rate": 0.00012577565632458234, "loss": 2.098, "step": 527 }, { "epoch": 0.018908804412054364, "grad_norm": 1.789971947669983, "learning_rate": 0.0001260143198090692, "loss": 1.7194, "step": 528 }, { "epoch": 0.018944616541622647, "grad_norm": 1.8698501586914062, "learning_rate": 0.00012625298329355607, "loss": 2.3349, "step": 529 }, { "epoch": 0.018980428671190933, "grad_norm": 1.8852440118789673, "learning_rate": 0.00012649164677804297, "loss": 1.918, "step": 530 }, { "epoch": 0.019016240800759216, "grad_norm": 2.258939743041992, "learning_rate": 0.00012673031026252983, "loss": 2.038, "step": 531 }, { "epoch": 0.019052052930327502, "grad_norm": 1.3770029544830322, "learning_rate": 0.0001269689737470167, "loss": 1.8361, "step": 532 }, { "epoch": 0.019087865059895785, "grad_norm": 1.3910750150680542, "learning_rate": 0.0001272076372315036, "loss": 2.0795, "step": 533 }, { "epoch": 0.01912367718946407, "grad_norm": 1.963090181350708, "learning_rate": 0.00012744630071599047, "loss": 2.2053, "step": 534 }, { "epoch": 0.019159489319032358, "grad_norm": 2.1430723667144775, "learning_rate": 0.00012768496420047733, "loss": 1.7194, "step": 535 }, { "epoch": 0.01919530144860064, "grad_norm": 1.6133919954299927, "learning_rate": 0.0001279236276849642, "loss": 1.821, "step": 536 }, { "epoch": 0.019231113578168927, "grad_norm": 1.79860258102417, "learning_rate": 0.0001281622911694511, "loss": 1.9787, "step": 537 }, { "epoch": 0.01926692570773721, "grad_norm": 1.847287893295288, "learning_rate": 0.00012840095465393796, "loss": 2.0806, "step": 538 }, { "epoch": 0.019302737837305496, "grad_norm": 1.458694338798523, "learning_rate": 0.00012863961813842483, "loss": 2.1716, "step": 539 }, { "epoch": 0.01933854996687378, "grad_norm": 2.063096761703491, "learning_rate": 0.0001288782816229117, "loss": 1.9751, "step": 540 }, { "epoch": 0.019374362096442065, "grad_norm": 1.8983663320541382, "learning_rate": 0.00012911694510739857, "loss": 1.9129, "step": 541 }, { "epoch": 0.019410174226010348, "grad_norm": 2.726100444793701, "learning_rate": 0.00012935560859188543, "loss": 2.3521, "step": 542 }, { "epoch": 0.019445986355578634, "grad_norm": 1.60848069190979, "learning_rate": 0.0001295942720763723, "loss": 1.673, "step": 543 }, { "epoch": 0.01948179848514692, "grad_norm": 1.7081117630004883, "learning_rate": 0.0001298329355608592, "loss": 2.2052, "step": 544 }, { "epoch": 0.019517610614715204, "grad_norm": 1.2822455167770386, "learning_rate": 0.00013007159904534607, "loss": 1.8536, "step": 545 }, { "epoch": 0.01955342274428349, "grad_norm": 1.9416059255599976, "learning_rate": 0.00013031026252983293, "loss": 2.3582, "step": 546 }, { "epoch": 0.019589234873851773, "grad_norm": 1.0728063583374023, "learning_rate": 0.0001305489260143198, "loss": 1.6316, "step": 547 }, { "epoch": 0.01962504700342006, "grad_norm": 1.4513641595840454, "learning_rate": 0.0001307875894988067, "loss": 1.9188, "step": 548 }, { "epoch": 0.019660859132988342, "grad_norm": 1.5579917430877686, "learning_rate": 0.00013102625298329356, "loss": 1.758, "step": 549 }, { "epoch": 0.019696671262556628, "grad_norm": 1.304106593132019, "learning_rate": 0.00013126491646778043, "loss": 1.9442, "step": 550 }, { "epoch": 0.01973248339212491, "grad_norm": 1.6284905672073364, "learning_rate": 0.00013150357995226733, "loss": 2.0928, "step": 551 }, { "epoch": 0.019768295521693197, "grad_norm": 2.598228931427002, "learning_rate": 0.0001317422434367542, "loss": 1.9553, "step": 552 }, { "epoch": 0.019804107651261484, "grad_norm": 2.1815645694732666, "learning_rate": 0.00013198090692124106, "loss": 2.2695, "step": 553 }, { "epoch": 0.019839919780829766, "grad_norm": 1.7056655883789062, "learning_rate": 0.00013221957040572793, "loss": 1.9625, "step": 554 }, { "epoch": 0.019875731910398053, "grad_norm": 1.9924041032791138, "learning_rate": 0.00013245823389021482, "loss": 2.1729, "step": 555 }, { "epoch": 0.019911544039966336, "grad_norm": 1.7806648015975952, "learning_rate": 0.00013269689737470167, "loss": 1.845, "step": 556 }, { "epoch": 0.019947356169534622, "grad_norm": 1.9855705499649048, "learning_rate": 0.00013293556085918853, "loss": 1.8858, "step": 557 }, { "epoch": 0.019983168299102905, "grad_norm": 2.107879161834717, "learning_rate": 0.0001331742243436754, "loss": 2.1954, "step": 558 }, { "epoch": 0.02001898042867119, "grad_norm": 1.5310916900634766, "learning_rate": 0.0001334128878281623, "loss": 2.0236, "step": 559 }, { "epoch": 0.020054792558239477, "grad_norm": 1.5715898275375366, "learning_rate": 0.00013365155131264916, "loss": 2.3037, "step": 560 }, { "epoch": 0.02009060468780776, "grad_norm": 1.846575379371643, "learning_rate": 0.00013389021479713603, "loss": 1.9472, "step": 561 }, { "epoch": 0.020126416817376046, "grad_norm": 1.5027644634246826, "learning_rate": 0.00013412887828162293, "loss": 1.8978, "step": 562 }, { "epoch": 0.02016222894694433, "grad_norm": 2.3320515155792236, "learning_rate": 0.0001343675417661098, "loss": 2.0998, "step": 563 }, { "epoch": 0.020198041076512616, "grad_norm": 1.4879544973373413, "learning_rate": 0.00013460620525059666, "loss": 2.0195, "step": 564 }, { "epoch": 0.0202338532060809, "grad_norm": 1.6790188550949097, "learning_rate": 0.00013484486873508353, "loss": 2.0034, "step": 565 }, { "epoch": 0.020269665335649185, "grad_norm": 1.3368083238601685, "learning_rate": 0.00013508353221957042, "loss": 1.853, "step": 566 }, { "epoch": 0.020305477465217468, "grad_norm": 2.40515398979187, "learning_rate": 0.0001353221957040573, "loss": 1.6864, "step": 567 }, { "epoch": 0.020341289594785754, "grad_norm": 3.6255276203155518, "learning_rate": 0.00013556085918854416, "loss": 2.0015, "step": 568 }, { "epoch": 0.02037710172435404, "grad_norm": 1.3990453481674194, "learning_rate": 0.00013579952267303105, "loss": 1.9736, "step": 569 }, { "epoch": 0.020412913853922323, "grad_norm": 1.815877079963684, "learning_rate": 0.00013603818615751792, "loss": 2.0062, "step": 570 }, { "epoch": 0.02044872598349061, "grad_norm": 2.521155595779419, "learning_rate": 0.0001362768496420048, "loss": 2.2658, "step": 571 }, { "epoch": 0.020484538113058892, "grad_norm": 1.6638094186782837, "learning_rate": 0.00013651551312649166, "loss": 2.0926, "step": 572 }, { "epoch": 0.02052035024262718, "grad_norm": 1.9844567775726318, "learning_rate": 0.00013675417661097853, "loss": 2.2214, "step": 573 }, { "epoch": 0.02055616237219546, "grad_norm": 1.8711490631103516, "learning_rate": 0.0001369928400954654, "loss": 1.9504, "step": 574 }, { "epoch": 0.020591974501763748, "grad_norm": 2.035768985748291, "learning_rate": 0.00013723150357995226, "loss": 1.8715, "step": 575 }, { "epoch": 0.02062778663133203, "grad_norm": 1.6506410837173462, "learning_rate": 0.00013747016706443913, "loss": 2.1253, "step": 576 }, { "epoch": 0.020663598760900317, "grad_norm": 1.5059458017349243, "learning_rate": 0.00013770883054892602, "loss": 1.9463, "step": 577 }, { "epoch": 0.020699410890468603, "grad_norm": 2.002347230911255, "learning_rate": 0.0001379474940334129, "loss": 2.3318, "step": 578 }, { "epoch": 0.020735223020036886, "grad_norm": 2.4913315773010254, "learning_rate": 0.00013818615751789976, "loss": 2.0054, "step": 579 }, { "epoch": 0.020771035149605172, "grad_norm": 1.9572597742080688, "learning_rate": 0.00013842482100238665, "loss": 2.24, "step": 580 }, { "epoch": 0.020806847279173455, "grad_norm": 2.6330034732818604, "learning_rate": 0.00013866348448687352, "loss": 2.0444, "step": 581 }, { "epoch": 0.02084265940874174, "grad_norm": 2.075014591217041, "learning_rate": 0.0001389021479713604, "loss": 2.1214, "step": 582 }, { "epoch": 0.020878471538310024, "grad_norm": 1.4259493350982666, "learning_rate": 0.00013914081145584726, "loss": 1.6096, "step": 583 }, { "epoch": 0.02091428366787831, "grad_norm": 2.2150115966796875, "learning_rate": 0.00013937947494033415, "loss": 2.0084, "step": 584 }, { "epoch": 0.020950095797446597, "grad_norm": 2.1223316192626953, "learning_rate": 0.00013961813842482102, "loss": 2.2784, "step": 585 }, { "epoch": 0.02098590792701488, "grad_norm": 1.5994501113891602, "learning_rate": 0.0001398568019093079, "loss": 2.2577, "step": 586 }, { "epoch": 0.021021720056583166, "grad_norm": 1.6532303094863892, "learning_rate": 0.00014009546539379476, "loss": 1.965, "step": 587 }, { "epoch": 0.02105753218615145, "grad_norm": 1.9446479082107544, "learning_rate": 0.00014033412887828162, "loss": 2.207, "step": 588 }, { "epoch": 0.021093344315719735, "grad_norm": 1.3874430656433105, "learning_rate": 0.0001405727923627685, "loss": 2.0884, "step": 589 }, { "epoch": 0.021129156445288018, "grad_norm": 1.4150493144989014, "learning_rate": 0.00014081145584725536, "loss": 1.8492, "step": 590 }, { "epoch": 0.021164968574856304, "grad_norm": 2.022547483444214, "learning_rate": 0.00014105011933174225, "loss": 1.9806, "step": 591 }, { "epoch": 0.021200780704424587, "grad_norm": 2.3493235111236572, "learning_rate": 0.00014128878281622912, "loss": 2.4232, "step": 592 }, { "epoch": 0.021236592833992873, "grad_norm": 1.6374825239181519, "learning_rate": 0.000141527446300716, "loss": 1.9931, "step": 593 }, { "epoch": 0.02127240496356116, "grad_norm": 1.9927897453308105, "learning_rate": 0.00014176610978520286, "loss": 1.9837, "step": 594 }, { "epoch": 0.021308217093129442, "grad_norm": 2.0843703746795654, "learning_rate": 0.00014200477326968975, "loss": 2.2173, "step": 595 }, { "epoch": 0.02134402922269773, "grad_norm": 2.041806936264038, "learning_rate": 0.00014224343675417662, "loss": 2.1302, "step": 596 }, { "epoch": 0.02137984135226601, "grad_norm": 2.7673728466033936, "learning_rate": 0.0001424821002386635, "loss": 2.6909, "step": 597 }, { "epoch": 0.021415653481834298, "grad_norm": 1.646714210510254, "learning_rate": 0.00014272076372315038, "loss": 2.1574, "step": 598 }, { "epoch": 0.02145146561140258, "grad_norm": 1.549854040145874, "learning_rate": 0.00014295942720763725, "loss": 2.1436, "step": 599 }, { "epoch": 0.021487277740970867, "grad_norm": 2.0806381702423096, "learning_rate": 0.00014319809069212412, "loss": 1.6527, "step": 600 }, { "epoch": 0.02152308987053915, "grad_norm": 1.1766951084136963, "learning_rate": 0.000143436754176611, "loss": 1.5967, "step": 601 }, { "epoch": 0.021558902000107436, "grad_norm": 1.7814152240753174, "learning_rate": 0.00014367541766109785, "loss": 1.8676, "step": 602 }, { "epoch": 0.021594714129675723, "grad_norm": 1.4170676469802856, "learning_rate": 0.00014391408114558472, "loss": 1.9861, "step": 603 }, { "epoch": 0.021630526259244005, "grad_norm": 1.3304917812347412, "learning_rate": 0.0001441527446300716, "loss": 1.7847, "step": 604 }, { "epoch": 0.02166633838881229, "grad_norm": 1.353492259979248, "learning_rate": 0.00014439140811455846, "loss": 1.9458, "step": 605 }, { "epoch": 0.021702150518380574, "grad_norm": 1.9017729759216309, "learning_rate": 0.00014463007159904535, "loss": 2.2398, "step": 606 }, { "epoch": 0.02173796264794886, "grad_norm": 1.3059676885604858, "learning_rate": 0.00014486873508353222, "loss": 1.7462, "step": 607 }, { "epoch": 0.021773774777517144, "grad_norm": 1.7438218593597412, "learning_rate": 0.0001451073985680191, "loss": 2.2967, "step": 608 }, { "epoch": 0.02180958690708543, "grad_norm": 1.7737010717391968, "learning_rate": 0.00014534606205250598, "loss": 1.9835, "step": 609 }, { "epoch": 0.021845399036653716, "grad_norm": 1.6507363319396973, "learning_rate": 0.00014558472553699285, "loss": 1.7111, "step": 610 }, { "epoch": 0.021881211166222, "grad_norm": 1.657170295715332, "learning_rate": 0.00014582338902147972, "loss": 2.1349, "step": 611 }, { "epoch": 0.021917023295790285, "grad_norm": 2.128286838531494, "learning_rate": 0.0001460620525059666, "loss": 2.0744, "step": 612 }, { "epoch": 0.021952835425358568, "grad_norm": 1.6936157941818237, "learning_rate": 0.00014630071599045348, "loss": 2.2095, "step": 613 }, { "epoch": 0.021988647554926855, "grad_norm": 1.3525550365447998, "learning_rate": 0.00014653937947494035, "loss": 1.9486, "step": 614 }, { "epoch": 0.022024459684495137, "grad_norm": 1.4725382328033447, "learning_rate": 0.00014677804295942722, "loss": 2.0803, "step": 615 }, { "epoch": 0.022060271814063424, "grad_norm": 1.4054583311080933, "learning_rate": 0.00014701670644391409, "loss": 1.6478, "step": 616 }, { "epoch": 0.022096083943631706, "grad_norm": 1.6811931133270264, "learning_rate": 0.00014725536992840095, "loss": 2.2387, "step": 617 }, { "epoch": 0.022131896073199993, "grad_norm": 2.0554606914520264, "learning_rate": 0.00014749403341288782, "loss": 1.9838, "step": 618 }, { "epoch": 0.02216770820276828, "grad_norm": 1.9794045686721802, "learning_rate": 0.0001477326968973747, "loss": 2.1976, "step": 619 }, { "epoch": 0.022203520332336562, "grad_norm": 1.6070706844329834, "learning_rate": 0.00014797136038186158, "loss": 2.0375, "step": 620 }, { "epoch": 0.022239332461904848, "grad_norm": 1.8594540357589722, "learning_rate": 0.00014821002386634845, "loss": 2.2466, "step": 621 }, { "epoch": 0.02227514459147313, "grad_norm": 2.2988474369049072, "learning_rate": 0.00014844868735083532, "loss": 2.0812, "step": 622 }, { "epoch": 0.022310956721041417, "grad_norm": 1.9472583532333374, "learning_rate": 0.0001486873508353222, "loss": 1.7122, "step": 623 }, { "epoch": 0.0223467688506097, "grad_norm": 2.4133808612823486, "learning_rate": 0.00014892601431980908, "loss": 2.0218, "step": 624 }, { "epoch": 0.022382580980177987, "grad_norm": 1.960550308227539, "learning_rate": 0.00014916467780429595, "loss": 1.6735, "step": 625 }, { "epoch": 0.02241839310974627, "grad_norm": 1.429086446762085, "learning_rate": 0.00014940334128878282, "loss": 1.8953, "step": 626 }, { "epoch": 0.022454205239314556, "grad_norm": 2.099316120147705, "learning_rate": 0.0001496420047732697, "loss": 2.0225, "step": 627 }, { "epoch": 0.022490017368882842, "grad_norm": 1.6147634983062744, "learning_rate": 0.00014988066825775658, "loss": 1.9627, "step": 628 }, { "epoch": 0.022525829498451125, "grad_norm": 3.438903570175171, "learning_rate": 0.00015011933174224345, "loss": 2.2949, "step": 629 }, { "epoch": 0.02256164162801941, "grad_norm": 2.2410268783569336, "learning_rate": 0.00015035799522673032, "loss": 2.1214, "step": 630 }, { "epoch": 0.022597453757587694, "grad_norm": 1.8363879919052124, "learning_rate": 0.0001505966587112172, "loss": 1.6187, "step": 631 }, { "epoch": 0.02263326588715598, "grad_norm": 1.890332579612732, "learning_rate": 0.00015083532219570408, "loss": 2.244, "step": 632 }, { "epoch": 0.022669078016724263, "grad_norm": 2.458477258682251, "learning_rate": 0.00015107398568019092, "loss": 2.4482, "step": 633 }, { "epoch": 0.02270489014629255, "grad_norm": 1.5786428451538086, "learning_rate": 0.00015131264916467781, "loss": 1.9677, "step": 634 }, { "epoch": 0.022740702275860836, "grad_norm": 1.7664666175842285, "learning_rate": 0.00015155131264916468, "loss": 2.1842, "step": 635 }, { "epoch": 0.02277651440542912, "grad_norm": 1.6327040195465088, "learning_rate": 0.00015178997613365155, "loss": 1.7608, "step": 636 }, { "epoch": 0.022812326534997405, "grad_norm": 1.7589260339736938, "learning_rate": 0.00015202863961813842, "loss": 1.9472, "step": 637 }, { "epoch": 0.022848138664565688, "grad_norm": 3.3443808555603027, "learning_rate": 0.0001522673031026253, "loss": 1.894, "step": 638 }, { "epoch": 0.022883950794133974, "grad_norm": 1.7203983068466187, "learning_rate": 0.00015250596658711218, "loss": 1.616, "step": 639 }, { "epoch": 0.022919762923702257, "grad_norm": 2.1859230995178223, "learning_rate": 0.00015274463007159905, "loss": 1.9724, "step": 640 }, { "epoch": 0.022955575053270543, "grad_norm": 1.5211198329925537, "learning_rate": 0.00015298329355608592, "loss": 2.0265, "step": 641 }, { "epoch": 0.022991387182838826, "grad_norm": 1.601146936416626, "learning_rate": 0.0001532219570405728, "loss": 1.9278, "step": 642 }, { "epoch": 0.023027199312407112, "grad_norm": 1.6272515058517456, "learning_rate": 0.00015346062052505968, "loss": 1.8003, "step": 643 }, { "epoch": 0.0230630114419754, "grad_norm": 2.8821325302124023, "learning_rate": 0.00015369928400954655, "loss": 2.173, "step": 644 }, { "epoch": 0.02309882357154368, "grad_norm": 1.6864391565322876, "learning_rate": 0.00015393794749403344, "loss": 2.0027, "step": 645 }, { "epoch": 0.023134635701111968, "grad_norm": 1.6988509893417358, "learning_rate": 0.0001541766109785203, "loss": 2.1044, "step": 646 }, { "epoch": 0.02317044783068025, "grad_norm": 1.5111042261123657, "learning_rate": 0.00015441527446300718, "loss": 1.9445, "step": 647 }, { "epoch": 0.023206259960248537, "grad_norm": 1.5460760593414307, "learning_rate": 0.00015465393794749404, "loss": 1.9057, "step": 648 }, { "epoch": 0.02324207208981682, "grad_norm": 2.1058406829833984, "learning_rate": 0.0001548926014319809, "loss": 2.2557, "step": 649 }, { "epoch": 0.023277884219385106, "grad_norm": 2.062448263168335, "learning_rate": 0.00015513126491646778, "loss": 2.3574, "step": 650 }, { "epoch": 0.02331369634895339, "grad_norm": 1.2844550609588623, "learning_rate": 0.00015536992840095465, "loss": 2.0645, "step": 651 }, { "epoch": 0.023349508478521675, "grad_norm": 1.7018535137176514, "learning_rate": 0.00015560859188544154, "loss": 2.0612, "step": 652 }, { "epoch": 0.02338532060808996, "grad_norm": 2.8740715980529785, "learning_rate": 0.0001558472553699284, "loss": 2.0078, "step": 653 }, { "epoch": 0.023421132737658244, "grad_norm": 2.4455902576446533, "learning_rate": 0.00015608591885441528, "loss": 1.9655, "step": 654 }, { "epoch": 0.02345694486722653, "grad_norm": 1.9727590084075928, "learning_rate": 0.00015632458233890215, "loss": 2.1078, "step": 655 }, { "epoch": 0.023492756996794813, "grad_norm": 1.6687122583389282, "learning_rate": 0.00015656324582338904, "loss": 2.0421, "step": 656 }, { "epoch": 0.0235285691263631, "grad_norm": 2.6429245471954346, "learning_rate": 0.0001568019093078759, "loss": 2.3127, "step": 657 }, { "epoch": 0.023564381255931383, "grad_norm": 2.1367905139923096, "learning_rate": 0.00015704057279236278, "loss": 2.2663, "step": 658 }, { "epoch": 0.02360019338549967, "grad_norm": 1.8748291730880737, "learning_rate": 0.00015727923627684964, "loss": 1.6464, "step": 659 }, { "epoch": 0.023636005515067955, "grad_norm": 1.3424737453460693, "learning_rate": 0.00015751789976133654, "loss": 1.9569, "step": 660 }, { "epoch": 0.023671817644636238, "grad_norm": 1.6000856161117554, "learning_rate": 0.0001577565632458234, "loss": 1.9856, "step": 661 }, { "epoch": 0.023707629774204524, "grad_norm": 1.6837635040283203, "learning_rate": 0.00015799522673031027, "loss": 1.8739, "step": 662 }, { "epoch": 0.023743441903772807, "grad_norm": 1.6797798871994019, "learning_rate": 0.00015823389021479714, "loss": 1.9404, "step": 663 }, { "epoch": 0.023779254033341093, "grad_norm": 1.217158317565918, "learning_rate": 0.000158472553699284, "loss": 2.0562, "step": 664 }, { "epoch": 0.023815066162909376, "grad_norm": 1.4763151407241821, "learning_rate": 0.00015871121718377088, "loss": 1.8707, "step": 665 }, { "epoch": 0.023850878292477663, "grad_norm": 1.5236214399337769, "learning_rate": 0.00015894988066825775, "loss": 1.7224, "step": 666 }, { "epoch": 0.023886690422045945, "grad_norm": 1.8331769704818726, "learning_rate": 0.00015918854415274464, "loss": 2.1492, "step": 667 }, { "epoch": 0.02392250255161423, "grad_norm": 1.3603184223175049, "learning_rate": 0.0001594272076372315, "loss": 2.0236, "step": 668 }, { "epoch": 0.023958314681182518, "grad_norm": 1.4922575950622559, "learning_rate": 0.00015966587112171838, "loss": 1.7288, "step": 669 }, { "epoch": 0.0239941268107508, "grad_norm": 1.5984807014465332, "learning_rate": 0.00015990453460620524, "loss": 2.249, "step": 670 }, { "epoch": 0.024029938940319087, "grad_norm": 1.735472321510315, "learning_rate": 0.00016014319809069214, "loss": 1.7396, "step": 671 }, { "epoch": 0.02406575106988737, "grad_norm": 1.040790319442749, "learning_rate": 0.000160381861575179, "loss": 1.7578, "step": 672 }, { "epoch": 0.024101563199455656, "grad_norm": 1.53122878074646, "learning_rate": 0.00016062052505966587, "loss": 1.7265, "step": 673 }, { "epoch": 0.02413737532902394, "grad_norm": 3.3196752071380615, "learning_rate": 0.00016085918854415277, "loss": 2.035, "step": 674 }, { "epoch": 0.024173187458592225, "grad_norm": 1.471759557723999, "learning_rate": 0.00016109785202863964, "loss": 1.9174, "step": 675 }, { "epoch": 0.02420899958816051, "grad_norm": 1.670170545578003, "learning_rate": 0.0001613365155131265, "loss": 2.0755, "step": 676 }, { "epoch": 0.024244811717728795, "grad_norm": 1.6233552694320679, "learning_rate": 0.00016157517899761337, "loss": 1.8244, "step": 677 }, { "epoch": 0.02428062384729708, "grad_norm": 3.0857901573181152, "learning_rate": 0.00016181384248210024, "loss": 2.8201, "step": 678 }, { "epoch": 0.024316435976865364, "grad_norm": 1.895977258682251, "learning_rate": 0.0001620525059665871, "loss": 1.9378, "step": 679 }, { "epoch": 0.02435224810643365, "grad_norm": 1.9350051879882812, "learning_rate": 0.00016229116945107398, "loss": 2.0559, "step": 680 }, { "epoch": 0.024388060236001933, "grad_norm": 2.0227410793304443, "learning_rate": 0.00016252983293556087, "loss": 2.2404, "step": 681 }, { "epoch": 0.02442387236557022, "grad_norm": 1.1764206886291504, "learning_rate": 0.00016276849642004774, "loss": 1.8332, "step": 682 }, { "epoch": 0.024459684495138502, "grad_norm": 2.8057363033294678, "learning_rate": 0.0001630071599045346, "loss": 2.0951, "step": 683 }, { "epoch": 0.02449549662470679, "grad_norm": 1.5808459520339966, "learning_rate": 0.00016324582338902147, "loss": 2.0856, "step": 684 }, { "epoch": 0.024531308754275075, "grad_norm": 2.126241683959961, "learning_rate": 0.00016348448687350837, "loss": 1.9048, "step": 685 }, { "epoch": 0.024567120883843357, "grad_norm": 1.388526201248169, "learning_rate": 0.00016372315035799524, "loss": 1.8886, "step": 686 }, { "epoch": 0.024602933013411644, "grad_norm": 1.420893907546997, "learning_rate": 0.0001639618138424821, "loss": 1.9233, "step": 687 }, { "epoch": 0.024638745142979927, "grad_norm": 1.5350810289382935, "learning_rate": 0.00016420047732696897, "loss": 2.1028, "step": 688 }, { "epoch": 0.024674557272548213, "grad_norm": 1.220397710800171, "learning_rate": 0.00016443914081145587, "loss": 1.9763, "step": 689 }, { "epoch": 0.024710369402116496, "grad_norm": 1.369965672492981, "learning_rate": 0.00016467780429594274, "loss": 2.074, "step": 690 }, { "epoch": 0.024746181531684782, "grad_norm": 2.692288398742676, "learning_rate": 0.0001649164677804296, "loss": 2.4718, "step": 691 }, { "epoch": 0.024781993661253065, "grad_norm": 1.9050400257110596, "learning_rate": 0.0001651551312649165, "loss": 2.2467, "step": 692 }, { "epoch": 0.02481780579082135, "grad_norm": 1.706419825553894, "learning_rate": 0.00016539379474940334, "loss": 1.7131, "step": 693 }, { "epoch": 0.024853617920389637, "grad_norm": 1.5882086753845215, "learning_rate": 0.0001656324582338902, "loss": 2.0457, "step": 694 }, { "epoch": 0.02488943004995792, "grad_norm": 1.5576844215393066, "learning_rate": 0.00016587112171837707, "loss": 1.7335, "step": 695 }, { "epoch": 0.024925242179526207, "grad_norm": 1.3042941093444824, "learning_rate": 0.00016610978520286397, "loss": 1.7833, "step": 696 }, { "epoch": 0.02496105430909449, "grad_norm": 1.5248041152954102, "learning_rate": 0.00016634844868735084, "loss": 1.8416, "step": 697 }, { "epoch": 0.024996866438662776, "grad_norm": 3.5359699726104736, "learning_rate": 0.0001665871121718377, "loss": 2.8096, "step": 698 }, { "epoch": 0.02503267856823106, "grad_norm": 1.6402137279510498, "learning_rate": 0.0001668257756563246, "loss": 2.0446, "step": 699 }, { "epoch": 0.025068490697799345, "grad_norm": 1.531969428062439, "learning_rate": 0.00016706443914081147, "loss": 1.9463, "step": 700 }, { "epoch": 0.025104302827367628, "grad_norm": 2.4962432384490967, "learning_rate": 0.00016730310262529834, "loss": 2.3453, "step": 701 }, { "epoch": 0.025140114956935914, "grad_norm": 1.790243148803711, "learning_rate": 0.0001675417661097852, "loss": 2.0987, "step": 702 }, { "epoch": 0.0251759270865042, "grad_norm": 1.7493826150894165, "learning_rate": 0.0001677804295942721, "loss": 1.7627, "step": 703 }, { "epoch": 0.025211739216072483, "grad_norm": 1.5792869329452515, "learning_rate": 0.00016801909307875897, "loss": 1.7321, "step": 704 }, { "epoch": 0.02524755134564077, "grad_norm": 1.3130288124084473, "learning_rate": 0.00016825775656324583, "loss": 2.0553, "step": 705 }, { "epoch": 0.025283363475209052, "grad_norm": 1.769005537033081, "learning_rate": 0.0001684964200477327, "loss": 2.0413, "step": 706 }, { "epoch": 0.02531917560477734, "grad_norm": 1.4005001783370972, "learning_rate": 0.0001687350835322196, "loss": 1.8354, "step": 707 }, { "epoch": 0.02535498773434562, "grad_norm": 1.722219705581665, "learning_rate": 0.00016897374701670646, "loss": 2.1277, "step": 708 }, { "epoch": 0.025390799863913908, "grad_norm": 1.9481533765792847, "learning_rate": 0.00016921241050119333, "loss": 2.1439, "step": 709 }, { "epoch": 0.025426611993482194, "grad_norm": 1.281445026397705, "learning_rate": 0.0001694510739856802, "loss": 2.1655, "step": 710 }, { "epoch": 0.025462424123050477, "grad_norm": 1.7158055305480957, "learning_rate": 0.00016968973747016707, "loss": 2.3323, "step": 711 }, { "epoch": 0.025498236252618763, "grad_norm": 1.4600533246994019, "learning_rate": 0.00016992840095465394, "loss": 1.9099, "step": 712 }, { "epoch": 0.025534048382187046, "grad_norm": 1.8186371326446533, "learning_rate": 0.0001701670644391408, "loss": 1.8916, "step": 713 }, { "epoch": 0.025569860511755332, "grad_norm": 1.6856729984283447, "learning_rate": 0.0001704057279236277, "loss": 1.8507, "step": 714 }, { "epoch": 0.025605672641323615, "grad_norm": 1.6164995431900024, "learning_rate": 0.00017064439140811457, "loss": 1.7116, "step": 715 }, { "epoch": 0.0256414847708919, "grad_norm": 1.3906581401824951, "learning_rate": 0.00017088305489260143, "loss": 2.0011, "step": 716 }, { "epoch": 0.025677296900460184, "grad_norm": 1.490162968635559, "learning_rate": 0.00017112171837708833, "loss": 2.2157, "step": 717 }, { "epoch": 0.02571310903002847, "grad_norm": 1.9008619785308838, "learning_rate": 0.0001713603818615752, "loss": 2.0239, "step": 718 }, { "epoch": 0.025748921159596757, "grad_norm": 1.612532377243042, "learning_rate": 0.00017159904534606206, "loss": 2.0756, "step": 719 }, { "epoch": 0.02578473328916504, "grad_norm": 1.4195812940597534, "learning_rate": 0.00017183770883054893, "loss": 2.0478, "step": 720 }, { "epoch": 0.025820545418733326, "grad_norm": 1.613661289215088, "learning_rate": 0.00017207637231503583, "loss": 2.3087, "step": 721 }, { "epoch": 0.02585635754830161, "grad_norm": 1.243811845779419, "learning_rate": 0.0001723150357995227, "loss": 1.9042, "step": 722 }, { "epoch": 0.025892169677869895, "grad_norm": 1.657544493675232, "learning_rate": 0.00017255369928400956, "loss": 1.998, "step": 723 }, { "epoch": 0.025927981807438178, "grad_norm": 2.2602198123931885, "learning_rate": 0.00017279236276849643, "loss": 2.2622, "step": 724 }, { "epoch": 0.025963793937006464, "grad_norm": 1.8787742853164673, "learning_rate": 0.0001730310262529833, "loss": 2.1541, "step": 725 }, { "epoch": 0.025999606066574747, "grad_norm": 1.5164096355438232, "learning_rate": 0.00017326968973747017, "loss": 1.8409, "step": 726 }, { "epoch": 0.026035418196143034, "grad_norm": 1.5882408618927002, "learning_rate": 0.00017350835322195703, "loss": 2.0037, "step": 727 }, { "epoch": 0.02607123032571132, "grad_norm": 2.327857732772827, "learning_rate": 0.00017374701670644393, "loss": 2.1629, "step": 728 }, { "epoch": 0.026107042455279603, "grad_norm": 2.476983070373535, "learning_rate": 0.0001739856801909308, "loss": 2.3573, "step": 729 }, { "epoch": 0.02614285458484789, "grad_norm": 2.473822593688965, "learning_rate": 0.00017422434367541766, "loss": 2.3806, "step": 730 }, { "epoch": 0.026178666714416172, "grad_norm": 2.2253236770629883, "learning_rate": 0.00017446300715990453, "loss": 2.1016, "step": 731 }, { "epoch": 0.026214478843984458, "grad_norm": 2.6160786151885986, "learning_rate": 0.00017470167064439143, "loss": 1.7466, "step": 732 }, { "epoch": 0.02625029097355274, "grad_norm": 1.6734675168991089, "learning_rate": 0.0001749403341288783, "loss": 1.9496, "step": 733 }, { "epoch": 0.026286103103121027, "grad_norm": 2.585047721862793, "learning_rate": 0.00017517899761336516, "loss": 2.2801, "step": 734 }, { "epoch": 0.026321915232689314, "grad_norm": 2.7207157611846924, "learning_rate": 0.00017541766109785203, "loss": 1.9879, "step": 735 }, { "epoch": 0.026357727362257596, "grad_norm": 1.3990424871444702, "learning_rate": 0.00017565632458233893, "loss": 2.206, "step": 736 }, { "epoch": 0.026393539491825883, "grad_norm": 1.7303647994995117, "learning_rate": 0.0001758949880668258, "loss": 2.1254, "step": 737 }, { "epoch": 0.026429351621394166, "grad_norm": 1.4347045421600342, "learning_rate": 0.00017613365155131266, "loss": 2.0242, "step": 738 }, { "epoch": 0.026465163750962452, "grad_norm": 2.1285247802734375, "learning_rate": 0.00017637231503579953, "loss": 1.7194, "step": 739 }, { "epoch": 0.026500975880530735, "grad_norm": 1.445741891860962, "learning_rate": 0.0001766109785202864, "loss": 1.951, "step": 740 }, { "epoch": 0.02653678801009902, "grad_norm": 1.6613547801971436, "learning_rate": 0.00017684964200477326, "loss": 1.9932, "step": 741 }, { "epoch": 0.026572600139667304, "grad_norm": 1.2473163604736328, "learning_rate": 0.00017708830548926013, "loss": 2.0447, "step": 742 }, { "epoch": 0.02660841226923559, "grad_norm": 1.4733924865722656, "learning_rate": 0.00017732696897374703, "loss": 1.9246, "step": 743 }, { "epoch": 0.026644224398803876, "grad_norm": 1.6123089790344238, "learning_rate": 0.0001775656324582339, "loss": 1.7595, "step": 744 }, { "epoch": 0.02668003652837216, "grad_norm": 1.358934998512268, "learning_rate": 0.00017780429594272076, "loss": 1.9185, "step": 745 }, { "epoch": 0.026715848657940446, "grad_norm": 1.765520691871643, "learning_rate": 0.00017804295942720766, "loss": 1.8516, "step": 746 }, { "epoch": 0.02675166078750873, "grad_norm": 2.589219331741333, "learning_rate": 0.00017828162291169453, "loss": 1.9495, "step": 747 }, { "epoch": 0.026787472917077015, "grad_norm": 1.5191903114318848, "learning_rate": 0.0001785202863961814, "loss": 2.121, "step": 748 }, { "epoch": 0.026823285046645298, "grad_norm": 2.5188381671905518, "learning_rate": 0.00017875894988066826, "loss": 2.154, "step": 749 }, { "epoch": 0.026859097176213584, "grad_norm": 1.1737215518951416, "learning_rate": 0.00017899761336515516, "loss": 1.6, "step": 750 }, { "epoch": 0.026894909305781867, "grad_norm": 1.4904850721359253, "learning_rate": 0.00017923627684964202, "loss": 2.1385, "step": 751 }, { "epoch": 0.026930721435350153, "grad_norm": 1.7092859745025635, "learning_rate": 0.0001794749403341289, "loss": 2.4254, "step": 752 }, { "epoch": 0.02696653356491844, "grad_norm": 1.8370356559753418, "learning_rate": 0.00017971360381861576, "loss": 2.0809, "step": 753 }, { "epoch": 0.027002345694486722, "grad_norm": 1.6396968364715576, "learning_rate": 0.00017995226730310263, "loss": 2.0178, "step": 754 }, { "epoch": 0.02703815782405501, "grad_norm": 1.7405651807785034, "learning_rate": 0.0001801909307875895, "loss": 2.1105, "step": 755 }, { "epoch": 0.02707396995362329, "grad_norm": 1.99338698387146, "learning_rate": 0.00018042959427207636, "loss": 2.1981, "step": 756 }, { "epoch": 0.027109782083191578, "grad_norm": 1.7673587799072266, "learning_rate": 0.00018066825775656326, "loss": 2.343, "step": 757 }, { "epoch": 0.02714559421275986, "grad_norm": 2.105565309524536, "learning_rate": 0.00018090692124105013, "loss": 2.0746, "step": 758 }, { "epoch": 0.027181406342328147, "grad_norm": 1.3338245153427124, "learning_rate": 0.000181145584725537, "loss": 1.9739, "step": 759 }, { "epoch": 0.027217218471896433, "grad_norm": 1.8781588077545166, "learning_rate": 0.00018138424821002386, "loss": 2.0575, "step": 760 }, { "epoch": 0.027253030601464716, "grad_norm": 1.1376395225524902, "learning_rate": 0.00018162291169451076, "loss": 1.8376, "step": 761 }, { "epoch": 0.027288842731033002, "grad_norm": 1.6982131004333496, "learning_rate": 0.00018186157517899762, "loss": 2.3264, "step": 762 }, { "epoch": 0.027324654860601285, "grad_norm": 1.391781210899353, "learning_rate": 0.0001821002386634845, "loss": 2.0107, "step": 763 }, { "epoch": 0.02736046699016957, "grad_norm": 1.4247759580612183, "learning_rate": 0.00018233890214797139, "loss": 1.8508, "step": 764 }, { "epoch": 0.027396279119737854, "grad_norm": 1.8408862352371216, "learning_rate": 0.00018257756563245825, "loss": 1.9833, "step": 765 }, { "epoch": 0.02743209124930614, "grad_norm": 2.116222381591797, "learning_rate": 0.00018281622911694512, "loss": 2.0208, "step": 766 }, { "epoch": 0.027467903378874423, "grad_norm": 1.4811813831329346, "learning_rate": 0.000183054892601432, "loss": 1.9656, "step": 767 }, { "epoch": 0.02750371550844271, "grad_norm": 1.1970711946487427, "learning_rate": 0.00018329355608591888, "loss": 2.0476, "step": 768 }, { "epoch": 0.027539527638010996, "grad_norm": 1.6708041429519653, "learning_rate": 0.00018353221957040575, "loss": 2.0781, "step": 769 }, { "epoch": 0.02757533976757928, "grad_norm": 1.3129223585128784, "learning_rate": 0.0001837708830548926, "loss": 1.7613, "step": 770 }, { "epoch": 0.027611151897147565, "grad_norm": 1.6428635120391846, "learning_rate": 0.00018400954653937946, "loss": 1.9753, "step": 771 }, { "epoch": 0.027646964026715848, "grad_norm": 1.3336197137832642, "learning_rate": 0.00018424821002386636, "loss": 1.8005, "step": 772 }, { "epoch": 0.027682776156284134, "grad_norm": 1.9145218133926392, "learning_rate": 0.00018448687350835322, "loss": 1.8981, "step": 773 }, { "epoch": 0.027718588285852417, "grad_norm": 1.456026315689087, "learning_rate": 0.0001847255369928401, "loss": 2.0075, "step": 774 }, { "epoch": 0.027754400415420703, "grad_norm": 1.5138424634933472, "learning_rate": 0.00018496420047732699, "loss": 2.2897, "step": 775 }, { "epoch": 0.027790212544988986, "grad_norm": 1.3635684251785278, "learning_rate": 0.00018520286396181385, "loss": 1.8127, "step": 776 }, { "epoch": 0.027826024674557272, "grad_norm": 2.9247641563415527, "learning_rate": 0.00018544152744630072, "loss": 2.0023, "step": 777 }, { "epoch": 0.02786183680412556, "grad_norm": 2.099508285522461, "learning_rate": 0.0001856801909307876, "loss": 1.8379, "step": 778 }, { "epoch": 0.02789764893369384, "grad_norm": 1.8196276426315308, "learning_rate": 0.00018591885441527448, "loss": 2.3346, "step": 779 }, { "epoch": 0.027933461063262128, "grad_norm": 1.5537874698638916, "learning_rate": 0.00018615751789976135, "loss": 2.0353, "step": 780 }, { "epoch": 0.02796927319283041, "grad_norm": 1.42340087890625, "learning_rate": 0.00018639618138424822, "loss": 1.8038, "step": 781 }, { "epoch": 0.028005085322398697, "grad_norm": 2.276153802871704, "learning_rate": 0.0001866348448687351, "loss": 1.9849, "step": 782 }, { "epoch": 0.02804089745196698, "grad_norm": 1.2486404180526733, "learning_rate": 0.00018687350835322198, "loss": 1.8228, "step": 783 }, { "epoch": 0.028076709581535266, "grad_norm": 2.1660590171813965, "learning_rate": 0.00018711217183770885, "loss": 1.8634, "step": 784 }, { "epoch": 0.028112521711103552, "grad_norm": 1.5528640747070312, "learning_rate": 0.00018735083532219572, "loss": 2.2286, "step": 785 }, { "epoch": 0.028148333840671835, "grad_norm": 1.3248339891433716, "learning_rate": 0.00018758949880668259, "loss": 2.0061, "step": 786 }, { "epoch": 0.02818414597024012, "grad_norm": 1.7929283380508423, "learning_rate": 0.00018782816229116945, "loss": 2.3811, "step": 787 }, { "epoch": 0.028219958099808404, "grad_norm": 1.729906678199768, "learning_rate": 0.00018806682577565632, "loss": 1.8049, "step": 788 }, { "epoch": 0.02825577022937669, "grad_norm": 1.7999067306518555, "learning_rate": 0.0001883054892601432, "loss": 1.9822, "step": 789 }, { "epoch": 0.028291582358944974, "grad_norm": 1.5044533014297485, "learning_rate": 0.00018854415274463008, "loss": 1.8759, "step": 790 }, { "epoch": 0.02832739448851326, "grad_norm": 1.5658330917358398, "learning_rate": 0.00018878281622911695, "loss": 2.1563, "step": 791 }, { "epoch": 0.028363206618081543, "grad_norm": 1.8020349740982056, "learning_rate": 0.00018902147971360382, "loss": 2.1909, "step": 792 }, { "epoch": 0.02839901874764983, "grad_norm": 1.3612864017486572, "learning_rate": 0.00018926014319809071, "loss": 1.7796, "step": 793 }, { "epoch": 0.028434830877218115, "grad_norm": 1.2705191373825073, "learning_rate": 0.00018949880668257758, "loss": 2.071, "step": 794 }, { "epoch": 0.028470643006786398, "grad_norm": 1.3243223428726196, "learning_rate": 0.00018973747016706445, "loss": 2.1743, "step": 795 }, { "epoch": 0.028506455136354684, "grad_norm": 1.805530309677124, "learning_rate": 0.00018997613365155132, "loss": 1.9458, "step": 796 }, { "epoch": 0.028542267265922967, "grad_norm": 1.6725308895111084, "learning_rate": 0.0001902147971360382, "loss": 1.9733, "step": 797 }, { "epoch": 0.028578079395491254, "grad_norm": 2.9171323776245117, "learning_rate": 0.00019045346062052508, "loss": 1.8562, "step": 798 }, { "epoch": 0.028613891525059536, "grad_norm": 1.2938868999481201, "learning_rate": 0.00019069212410501195, "loss": 2.2139, "step": 799 }, { "epoch": 0.028649703654627823, "grad_norm": 1.9000681638717651, "learning_rate": 0.00019093078758949882, "loss": 1.9803, "step": 800 }, { "epoch": 0.028685515784196106, "grad_norm": 1.895714282989502, "learning_rate": 0.00019116945107398568, "loss": 1.8723, "step": 801 }, { "epoch": 0.028721327913764392, "grad_norm": 1.1832196712493896, "learning_rate": 0.00019140811455847255, "loss": 1.7096, "step": 802 }, { "epoch": 0.028757140043332678, "grad_norm": 1.382149577140808, "learning_rate": 0.00019164677804295942, "loss": 1.7541, "step": 803 }, { "epoch": 0.02879295217290096, "grad_norm": 1.5034754276275635, "learning_rate": 0.00019188544152744631, "loss": 2.3756, "step": 804 }, { "epoch": 0.028828764302469247, "grad_norm": 1.5388857126235962, "learning_rate": 0.00019212410501193318, "loss": 1.9153, "step": 805 }, { "epoch": 0.02886457643203753, "grad_norm": 2.118048667907715, "learning_rate": 0.00019236276849642005, "loss": 2.2704, "step": 806 }, { "epoch": 0.028900388561605816, "grad_norm": 1.6065908670425415, "learning_rate": 0.00019260143198090692, "loss": 2.0055, "step": 807 }, { "epoch": 0.0289362006911741, "grad_norm": 2.0480384826660156, "learning_rate": 0.0001928400954653938, "loss": 2.033, "step": 808 }, { "epoch": 0.028972012820742386, "grad_norm": 2.066574811935425, "learning_rate": 0.00019307875894988068, "loss": 2.2037, "step": 809 }, { "epoch": 0.029007824950310672, "grad_norm": 1.3903077840805054, "learning_rate": 0.00019331742243436755, "loss": 1.7733, "step": 810 }, { "epoch": 0.029043637079878955, "grad_norm": 2.8067617416381836, "learning_rate": 0.00019355608591885444, "loss": 1.7627, "step": 811 }, { "epoch": 0.02907944920944724, "grad_norm": 1.9488036632537842, "learning_rate": 0.0001937947494033413, "loss": 1.9536, "step": 812 }, { "epoch": 0.029115261339015524, "grad_norm": 1.4101473093032837, "learning_rate": 0.00019403341288782818, "loss": 2.1673, "step": 813 }, { "epoch": 0.02915107346858381, "grad_norm": 1.2836867570877075, "learning_rate": 0.00019427207637231505, "loss": 1.7867, "step": 814 }, { "epoch": 0.029186885598152093, "grad_norm": 1.4783426523208618, "learning_rate": 0.00019451073985680191, "loss": 2.1039, "step": 815 }, { "epoch": 0.02922269772772038, "grad_norm": 1.3614662885665894, "learning_rate": 0.00019474940334128878, "loss": 2.0084, "step": 816 }, { "epoch": 0.029258509857288662, "grad_norm": 1.2852891683578491, "learning_rate": 0.00019498806682577565, "loss": 1.8364, "step": 817 }, { "epoch": 0.02929432198685695, "grad_norm": 1.6663340330123901, "learning_rate": 0.00019522673031026252, "loss": 2.1946, "step": 818 }, { "epoch": 0.029330134116425235, "grad_norm": 2.088148832321167, "learning_rate": 0.0001954653937947494, "loss": 2.1451, "step": 819 }, { "epoch": 0.029365946245993518, "grad_norm": 1.2781169414520264, "learning_rate": 0.00019570405727923628, "loss": 1.8716, "step": 820 }, { "epoch": 0.029401758375561804, "grad_norm": 1.7055004835128784, "learning_rate": 0.00019594272076372315, "loss": 2.1419, "step": 821 }, { "epoch": 0.029437570505130087, "grad_norm": 2.0636518001556396, "learning_rate": 0.00019618138424821004, "loss": 2.2445, "step": 822 }, { "epoch": 0.029473382634698373, "grad_norm": 1.557982325553894, "learning_rate": 0.0001964200477326969, "loss": 1.8256, "step": 823 }, { "epoch": 0.029509194764266656, "grad_norm": 1.3213343620300293, "learning_rate": 0.00019665871121718378, "loss": 1.6728, "step": 824 }, { "epoch": 0.029545006893834942, "grad_norm": 2.1603245735168457, "learning_rate": 0.00019689737470167065, "loss": 2.0918, "step": 825 }, { "epoch": 0.029580819023403225, "grad_norm": 1.5868972539901733, "learning_rate": 0.00019713603818615754, "loss": 2.0132, "step": 826 }, { "epoch": 0.02961663115297151, "grad_norm": 2.6218371391296387, "learning_rate": 0.0001973747016706444, "loss": 1.97, "step": 827 }, { "epoch": 0.029652443282539798, "grad_norm": 1.8237935304641724, "learning_rate": 0.00019761336515513128, "loss": 1.8931, "step": 828 }, { "epoch": 0.02968825541210808, "grad_norm": 1.9808688163757324, "learning_rate": 0.00019785202863961817, "loss": 1.9166, "step": 829 }, { "epoch": 0.029724067541676367, "grad_norm": 1.4820518493652344, "learning_rate": 0.000198090692124105, "loss": 1.9317, "step": 830 }, { "epoch": 0.02975987967124465, "grad_norm": 2.2204062938690186, "learning_rate": 0.00019832935560859188, "loss": 2.0906, "step": 831 }, { "epoch": 0.029795691800812936, "grad_norm": 1.4470603466033936, "learning_rate": 0.00019856801909307875, "loss": 1.9674, "step": 832 }, { "epoch": 0.02983150393038122, "grad_norm": 1.444513201713562, "learning_rate": 0.00019880668257756564, "loss": 1.9251, "step": 833 }, { "epoch": 0.029867316059949505, "grad_norm": 1.6284515857696533, "learning_rate": 0.0001990453460620525, "loss": 1.8075, "step": 834 }, { "epoch": 0.02990312818951779, "grad_norm": 1.885413646697998, "learning_rate": 0.00019928400954653938, "loss": 2.0856, "step": 835 }, { "epoch": 0.029938940319086074, "grad_norm": 1.7370575666427612, "learning_rate": 0.00019952267303102625, "loss": 1.7621, "step": 836 }, { "epoch": 0.02997475244865436, "grad_norm": 2.0905561447143555, "learning_rate": 0.00019976133651551314, "loss": 2.5441, "step": 837 }, { "epoch": 0.030010564578222643, "grad_norm": 1.5622786283493042, "learning_rate": 0.0002, "loss": 1.9866, "step": 838 }, { "epoch": 0.03004637670779093, "grad_norm": 1.1882922649383545, "learning_rate": 0.0001999999993273145, "loss": 1.807, "step": 839 }, { "epoch": 0.030082188837359213, "grad_norm": 1.3145676851272583, "learning_rate": 0.000199999997309258, "loss": 1.9652, "step": 840 }, { "epoch": 0.0301180009669275, "grad_norm": 1.4804152250289917, "learning_rate": 0.00019999999394583053, "loss": 2.1154, "step": 841 }, { "epoch": 0.03015381309649578, "grad_norm": 1.6515103578567505, "learning_rate": 0.00019999998923703213, "loss": 1.7606, "step": 842 }, { "epoch": 0.030189625226064068, "grad_norm": 2.046954393386841, "learning_rate": 0.00019999998318286286, "loss": 1.9643, "step": 843 }, { "epoch": 0.030225437355632354, "grad_norm": 2.260840654373169, "learning_rate": 0.0001999999757833228, "loss": 2.0192, "step": 844 }, { "epoch": 0.030261249485200637, "grad_norm": 2.859248161315918, "learning_rate": 0.00019999996703841207, "loss": 1.9989, "step": 845 }, { "epoch": 0.030297061614768923, "grad_norm": 1.7310001850128174, "learning_rate": 0.00019999995694813073, "loss": 2.0006, "step": 846 }, { "epoch": 0.030332873744337206, "grad_norm": 1.423134207725525, "learning_rate": 0.00019999994551247901, "loss": 1.7585, "step": 847 }, { "epoch": 0.030368685873905493, "grad_norm": 1.5323225259780884, "learning_rate": 0.000199999932731457, "loss": 2.1375, "step": 848 }, { "epoch": 0.030404498003473775, "grad_norm": 1.53788423538208, "learning_rate": 0.00019999991860506492, "loss": 1.9595, "step": 849 }, { "epoch": 0.03044031013304206, "grad_norm": 1.2339884042739868, "learning_rate": 0.00019999990313330286, "loss": 2.0193, "step": 850 }, { "epoch": 0.030476122262610345, "grad_norm": 1.9879727363586426, "learning_rate": 0.00019999988631617114, "loss": 1.5824, "step": 851 }, { "epoch": 0.03051193439217863, "grad_norm": 1.245160460472107, "learning_rate": 0.00019999986815366993, "loss": 1.7495, "step": 852 }, { "epoch": 0.030547746521746917, "grad_norm": 1.278892159461975, "learning_rate": 0.0001999998486457995, "loss": 2.207, "step": 853 }, { "epoch": 0.0305835586513152, "grad_norm": 1.3570001125335693, "learning_rate": 0.00019999982779256005, "loss": 1.7336, "step": 854 }, { "epoch": 0.030619370780883486, "grad_norm": 1.5163108110427856, "learning_rate": 0.00019999980559395195, "loss": 2.1173, "step": 855 }, { "epoch": 0.03065518291045177, "grad_norm": 1.1349409818649292, "learning_rate": 0.00019999978204997545, "loss": 1.8362, "step": 856 }, { "epoch": 0.030690995040020055, "grad_norm": 1.9292572736740112, "learning_rate": 0.00019999975716063087, "loss": 2.0006, "step": 857 }, { "epoch": 0.030726807169588338, "grad_norm": 1.8030412197113037, "learning_rate": 0.0001999997309259185, "loss": 2.0975, "step": 858 }, { "epoch": 0.030762619299156625, "grad_norm": 1.3017148971557617, "learning_rate": 0.0001999997033458388, "loss": 1.9676, "step": 859 }, { "epoch": 0.03079843142872491, "grad_norm": 1.4081376791000366, "learning_rate": 0.00019999967442039206, "loss": 1.985, "step": 860 }, { "epoch": 0.030834243558293194, "grad_norm": 1.465991735458374, "learning_rate": 0.0001999996441495787, "loss": 1.7478, "step": 861 }, { "epoch": 0.03087005568786148, "grad_norm": 1.6048805713653564, "learning_rate": 0.0001999996125333991, "loss": 2.0733, "step": 862 }, { "epoch": 0.030905867817429763, "grad_norm": 1.3127105236053467, "learning_rate": 0.00019999957957185375, "loss": 1.9589, "step": 863 }, { "epoch": 0.03094167994699805, "grad_norm": 1.815430760383606, "learning_rate": 0.000199999545264943, "loss": 2.246, "step": 864 }, { "epoch": 0.030977492076566332, "grad_norm": 1.334842324256897, "learning_rate": 0.00019999950961266738, "loss": 1.8453, "step": 865 }, { "epoch": 0.03101330420613462, "grad_norm": 1.6176645755767822, "learning_rate": 0.00019999947261502735, "loss": 2.0127, "step": 866 }, { "epoch": 0.0310491163357029, "grad_norm": 1.444711446762085, "learning_rate": 0.0001999994342720234, "loss": 1.7625, "step": 867 }, { "epoch": 0.031084928465271187, "grad_norm": 1.4336127042770386, "learning_rate": 0.00019999939458365605, "loss": 1.799, "step": 868 }, { "epoch": 0.031120740594839474, "grad_norm": 1.5195722579956055, "learning_rate": 0.00019999935354992582, "loss": 1.9003, "step": 869 }, { "epoch": 0.031156552724407757, "grad_norm": 2.2031455039978027, "learning_rate": 0.0001999993111708333, "loss": 2.1488, "step": 870 }, { "epoch": 0.031192364853976043, "grad_norm": 1.6597728729248047, "learning_rate": 0.00019999926744637903, "loss": 1.9993, "step": 871 }, { "epoch": 0.031228176983544326, "grad_norm": 1.707897424697876, "learning_rate": 0.0001999992223765636, "loss": 2.0831, "step": 872 }, { "epoch": 0.03126398911311261, "grad_norm": 1.9093265533447266, "learning_rate": 0.00019999917596138765, "loss": 2.2365, "step": 873 }, { "epoch": 0.0312998012426809, "grad_norm": 2.5550174713134766, "learning_rate": 0.00019999912820085176, "loss": 2.1307, "step": 874 }, { "epoch": 0.03133561337224918, "grad_norm": 1.5057379007339478, "learning_rate": 0.0001999990790949566, "loss": 2.0101, "step": 875 }, { "epoch": 0.031371425501817464, "grad_norm": 1.5937858819961548, "learning_rate": 0.0001999990286437028, "loss": 2.0092, "step": 876 }, { "epoch": 0.031407237631385754, "grad_norm": 1.1650123596191406, "learning_rate": 0.00019999897684709104, "loss": 1.8871, "step": 877 }, { "epoch": 0.03144304976095404, "grad_norm": 2.418663501739502, "learning_rate": 0.00019999892370512208, "loss": 2.106, "step": 878 }, { "epoch": 0.03147886189052232, "grad_norm": 1.407043218612671, "learning_rate": 0.00019999886921779657, "loss": 1.9746, "step": 879 }, { "epoch": 0.0315146740200906, "grad_norm": 1.4505114555358887, "learning_rate": 0.00019999881338511526, "loss": 1.9175, "step": 880 }, { "epoch": 0.03155048614965889, "grad_norm": 1.4700239896774292, "learning_rate": 0.0001999987562070789, "loss": 1.9464, "step": 881 }, { "epoch": 0.031586298279227175, "grad_norm": 1.1745959520339966, "learning_rate": 0.00019999869768368828, "loss": 1.7727, "step": 882 }, { "epoch": 0.03162211040879546, "grad_norm": 1.303771734237671, "learning_rate": 0.0001999986378149442, "loss": 2.056, "step": 883 }, { "epoch": 0.03165792253836374, "grad_norm": 1.8219273090362549, "learning_rate": 0.00019999857660084737, "loss": 1.768, "step": 884 }, { "epoch": 0.03169373466793203, "grad_norm": 1.5007424354553223, "learning_rate": 0.00019999851404139873, "loss": 1.9576, "step": 885 }, { "epoch": 0.03172954679750031, "grad_norm": 1.8747928142547607, "learning_rate": 0.00019999845013659906, "loss": 1.8605, "step": 886 }, { "epoch": 0.031765358927068596, "grad_norm": 1.2296186685562134, "learning_rate": 0.00019999838488644924, "loss": 2.0188, "step": 887 }, { "epoch": 0.031801171056636886, "grad_norm": 1.2861896753311157, "learning_rate": 0.00019999831829095013, "loss": 2.0889, "step": 888 }, { "epoch": 0.03183698318620517, "grad_norm": 1.5891250371932983, "learning_rate": 0.00019999825035010263, "loss": 1.9346, "step": 889 }, { "epoch": 0.03187279531577345, "grad_norm": 1.4180065393447876, "learning_rate": 0.00019999818106390766, "loss": 1.7997, "step": 890 }, { "epoch": 0.031908607445341734, "grad_norm": 1.7038114070892334, "learning_rate": 0.0001999981104323662, "loss": 1.7128, "step": 891 }, { "epoch": 0.031944419574910024, "grad_norm": 1.518619179725647, "learning_rate": 0.00019999803845547907, "loss": 1.802, "step": 892 }, { "epoch": 0.03198023170447831, "grad_norm": 1.3319896459579468, "learning_rate": 0.00019999796513324735, "loss": 1.8977, "step": 893 }, { "epoch": 0.03201604383404659, "grad_norm": 1.7978893518447876, "learning_rate": 0.00019999789046567203, "loss": 1.8772, "step": 894 }, { "epoch": 0.03205185596361488, "grad_norm": 2.5152993202209473, "learning_rate": 0.00019999781445275406, "loss": 1.9364, "step": 895 }, { "epoch": 0.03208766809318316, "grad_norm": 1.4267518520355225, "learning_rate": 0.0001999977370944945, "loss": 2.3957, "step": 896 }, { "epoch": 0.032123480222751445, "grad_norm": 1.907285451889038, "learning_rate": 0.00019999765839089434, "loss": 1.6652, "step": 897 }, { "epoch": 0.03215929235231973, "grad_norm": 1.9558316469192505, "learning_rate": 0.00019999757834195472, "loss": 1.9541, "step": 898 }, { "epoch": 0.03219510448188802, "grad_norm": 1.6896051168441772, "learning_rate": 0.00019999749694767666, "loss": 1.722, "step": 899 }, { "epoch": 0.0322309166114563, "grad_norm": 2.4404208660125732, "learning_rate": 0.0001999974142080612, "loss": 2.4644, "step": 900 }, { "epoch": 0.03226672874102458, "grad_norm": 1.4918023347854614, "learning_rate": 0.00019999733012310958, "loss": 1.9803, "step": 901 }, { "epoch": 0.03230254087059287, "grad_norm": 1.9694902896881104, "learning_rate": 0.00019999724469282288, "loss": 1.9535, "step": 902 }, { "epoch": 0.032338353000161156, "grad_norm": 1.778144359588623, "learning_rate": 0.00019999715791720223, "loss": 1.8126, "step": 903 }, { "epoch": 0.03237416512972944, "grad_norm": 1.5544754266738892, "learning_rate": 0.00019999706979624877, "loss": 2.0249, "step": 904 }, { "epoch": 0.03240997725929772, "grad_norm": 2.4764208793640137, "learning_rate": 0.00019999698032996377, "loss": 2.1015, "step": 905 }, { "epoch": 0.03244578938886601, "grad_norm": 1.926424503326416, "learning_rate": 0.00019999688951834836, "loss": 1.8125, "step": 906 }, { "epoch": 0.032481601518434294, "grad_norm": 2.0330772399902344, "learning_rate": 0.0001999967973614038, "loss": 2.0531, "step": 907 }, { "epoch": 0.03251741364800258, "grad_norm": 2.0009052753448486, "learning_rate": 0.00019999670385913133, "loss": 2.2191, "step": 908 }, { "epoch": 0.03255322577757086, "grad_norm": 1.3479782342910767, "learning_rate": 0.00019999660901153218, "loss": 1.7542, "step": 909 }, { "epoch": 0.03258903790713915, "grad_norm": 1.28345787525177, "learning_rate": 0.00019999651281860762, "loss": 1.9944, "step": 910 }, { "epoch": 0.03262485003670743, "grad_norm": 1.4357455968856812, "learning_rate": 0.00019999641528035898, "loss": 2.0591, "step": 911 }, { "epoch": 0.032660662166275715, "grad_norm": 1.6938787698745728, "learning_rate": 0.0001999963163967876, "loss": 2.2751, "step": 912 }, { "epoch": 0.032696474295844005, "grad_norm": 1.267437219619751, "learning_rate": 0.00019999621616789473, "loss": 1.8928, "step": 913 }, { "epoch": 0.03273228642541229, "grad_norm": 1.2574923038482666, "learning_rate": 0.00019999611459368174, "loss": 1.793, "step": 914 }, { "epoch": 0.03276809855498057, "grad_norm": 1.0232667922973633, "learning_rate": 0.00019999601167415006, "loss": 1.8391, "step": 915 }, { "epoch": 0.032803910684548854, "grad_norm": 2.3581461906433105, "learning_rate": 0.000199995907409301, "loss": 2.4764, "step": 916 }, { "epoch": 0.032839722814117144, "grad_norm": 1.5463016033172607, "learning_rate": 0.000199995801799136, "loss": 1.9629, "step": 917 }, { "epoch": 0.032875534943685426, "grad_norm": 2.3027689456939697, "learning_rate": 0.00019999569484365645, "loss": 1.9298, "step": 918 }, { "epoch": 0.03291134707325371, "grad_norm": 1.575325846672058, "learning_rate": 0.00019999558654286385, "loss": 1.8611, "step": 919 }, { "epoch": 0.032947159202822, "grad_norm": 2.7732012271881104, "learning_rate": 0.0001999954768967596, "loss": 2.2494, "step": 920 }, { "epoch": 0.03298297133239028, "grad_norm": 2.0338802337646484, "learning_rate": 0.0001999953659053452, "loss": 1.8992, "step": 921 }, { "epoch": 0.033018783461958565, "grad_norm": 1.5755022764205933, "learning_rate": 0.0001999952535686221, "loss": 2.1845, "step": 922 }, { "epoch": 0.03305459559152685, "grad_norm": 1.2108244895935059, "learning_rate": 0.00019999513988659188, "loss": 1.6432, "step": 923 }, { "epoch": 0.03309040772109514, "grad_norm": 2.210641860961914, "learning_rate": 0.00019999502485925605, "loss": 2.4049, "step": 924 }, { "epoch": 0.03312621985066342, "grad_norm": 1.6159656047821045, "learning_rate": 0.00019999490848661612, "loss": 1.6542, "step": 925 }, { "epoch": 0.0331620319802317, "grad_norm": 1.2654778957366943, "learning_rate": 0.00019999479076867368, "loss": 1.991, "step": 926 }, { "epoch": 0.03319784410979999, "grad_norm": 1.8192824125289917, "learning_rate": 0.00019999467170543031, "loss": 1.9816, "step": 927 }, { "epoch": 0.033233656239368276, "grad_norm": 1.5657546520233154, "learning_rate": 0.00019999455129688764, "loss": 1.8487, "step": 928 }, { "epoch": 0.03326946836893656, "grad_norm": 1.4876744747161865, "learning_rate": 0.00019999442954304729, "loss": 1.7911, "step": 929 }, { "epoch": 0.03330528049850484, "grad_norm": 1.8739250898361206, "learning_rate": 0.00019999430644391082, "loss": 1.9183, "step": 930 }, { "epoch": 0.03334109262807313, "grad_norm": 1.3608872890472412, "learning_rate": 0.00019999418199947994, "loss": 1.8478, "step": 931 }, { "epoch": 0.033376904757641414, "grad_norm": 1.5598634481430054, "learning_rate": 0.00019999405620975636, "loss": 2.0591, "step": 932 }, { "epoch": 0.0334127168872097, "grad_norm": 2.9095773696899414, "learning_rate": 0.00019999392907474174, "loss": 2.0628, "step": 933 }, { "epoch": 0.03344852901677798, "grad_norm": 1.6652559041976929, "learning_rate": 0.00019999380059443773, "loss": 1.9522, "step": 934 }, { "epoch": 0.03348434114634627, "grad_norm": 1.8389030694961548, "learning_rate": 0.00019999367076884616, "loss": 1.8165, "step": 935 }, { "epoch": 0.03352015327591455, "grad_norm": 1.5243996381759644, "learning_rate": 0.00019999353959796872, "loss": 1.9362, "step": 936 }, { "epoch": 0.033555965405482835, "grad_norm": 1.587996006011963, "learning_rate": 0.0001999934070818072, "loss": 1.9498, "step": 937 }, { "epoch": 0.033591777535051125, "grad_norm": 1.3211830854415894, "learning_rate": 0.00019999327322036336, "loss": 1.8008, "step": 938 }, { "epoch": 0.03362758966461941, "grad_norm": 1.775557518005371, "learning_rate": 0.00019999313801363902, "loss": 2.0888, "step": 939 }, { "epoch": 0.03366340179418769, "grad_norm": 1.5863924026489258, "learning_rate": 0.00019999300146163597, "loss": 1.7688, "step": 940 }, { "epoch": 0.03369921392375597, "grad_norm": 2.072439193725586, "learning_rate": 0.00019999286356435608, "loss": 1.9447, "step": 941 }, { "epoch": 0.03373502605332426, "grad_norm": 1.8205657005310059, "learning_rate": 0.0001999927243218012, "loss": 1.852, "step": 942 }, { "epoch": 0.033770838182892546, "grad_norm": 2.22773814201355, "learning_rate": 0.0001999925837339732, "loss": 1.898, "step": 943 }, { "epoch": 0.03380665031246083, "grad_norm": 1.3842288255691528, "learning_rate": 0.00019999244180087395, "loss": 2.0167, "step": 944 }, { "epoch": 0.03384246244202912, "grad_norm": 1.9286949634552002, "learning_rate": 0.00019999229852250537, "loss": 1.9268, "step": 945 }, { "epoch": 0.0338782745715974, "grad_norm": 2.1182754039764404, "learning_rate": 0.00019999215389886942, "loss": 2.0501, "step": 946 }, { "epoch": 0.033914086701165684, "grad_norm": 1.6029773950576782, "learning_rate": 0.000199992007929968, "loss": 1.9263, "step": 947 }, { "epoch": 0.03394989883073397, "grad_norm": 1.2110930681228638, "learning_rate": 0.0001999918606158031, "loss": 1.9129, "step": 948 }, { "epoch": 0.03398571096030226, "grad_norm": 1.507839322090149, "learning_rate": 0.0001999917119563767, "loss": 1.9622, "step": 949 }, { "epoch": 0.03402152308987054, "grad_norm": 1.6795588731765747, "learning_rate": 0.00019999156195169078, "loss": 1.864, "step": 950 }, { "epoch": 0.03405733521943882, "grad_norm": 1.9231281280517578, "learning_rate": 0.0001999914106017474, "loss": 2.1288, "step": 951 }, { "epoch": 0.03409314734900711, "grad_norm": 1.7240833044052124, "learning_rate": 0.00019999125790654855, "loss": 1.7942, "step": 952 }, { "epoch": 0.034128959478575395, "grad_norm": 1.4035639762878418, "learning_rate": 0.0001999911038660963, "loss": 1.9428, "step": 953 }, { "epoch": 0.03416477160814368, "grad_norm": 1.2519588470458984, "learning_rate": 0.00019999094848039274, "loss": 1.7459, "step": 954 }, { "epoch": 0.03420058373771196, "grad_norm": 1.201668620109558, "learning_rate": 0.00019999079174943995, "loss": 1.6607, "step": 955 }, { "epoch": 0.03423639586728025, "grad_norm": 1.6537754535675049, "learning_rate": 0.00019999063367324003, "loss": 2.0089, "step": 956 }, { "epoch": 0.03427220799684853, "grad_norm": 2.3439764976501465, "learning_rate": 0.0001999904742517951, "loss": 2.1381, "step": 957 }, { "epoch": 0.034308020126416816, "grad_norm": 1.219859004020691, "learning_rate": 0.00019999031348510733, "loss": 2.0814, "step": 958 }, { "epoch": 0.0343438322559851, "grad_norm": 1.969250202178955, "learning_rate": 0.00019999015137317887, "loss": 2.0864, "step": 959 }, { "epoch": 0.03437964438555339, "grad_norm": 2.401399850845337, "learning_rate": 0.0001999899879160119, "loss": 1.9438, "step": 960 }, { "epoch": 0.03441545651512167, "grad_norm": 1.4327278137207031, "learning_rate": 0.00019998982311360863, "loss": 1.9214, "step": 961 }, { "epoch": 0.034451268644689954, "grad_norm": 2.319035768508911, "learning_rate": 0.00019998965696597126, "loss": 2.3354, "step": 962 }, { "epoch": 0.034487080774258244, "grad_norm": 1.1964077949523926, "learning_rate": 0.00019998948947310202, "loss": 1.9042, "step": 963 }, { "epoch": 0.03452289290382653, "grad_norm": 1.8281431198120117, "learning_rate": 0.0001999893206350032, "loss": 1.9747, "step": 964 }, { "epoch": 0.03455870503339481, "grad_norm": 1.1979892253875732, "learning_rate": 0.00019998915045167702, "loss": 1.6347, "step": 965 }, { "epoch": 0.03459451716296309, "grad_norm": 1.382490873336792, "learning_rate": 0.0001999889789231258, "loss": 2.0574, "step": 966 }, { "epoch": 0.03463032929253138, "grad_norm": 1.8725450038909912, "learning_rate": 0.00019998880604935187, "loss": 1.9832, "step": 967 }, { "epoch": 0.034666141422099665, "grad_norm": 1.4053820371627808, "learning_rate": 0.00019998863183035752, "loss": 1.5536, "step": 968 }, { "epoch": 0.03470195355166795, "grad_norm": 2.361921787261963, "learning_rate": 0.0001999884562661451, "loss": 2.347, "step": 969 }, { "epoch": 0.03473776568123624, "grad_norm": 1.5455695390701294, "learning_rate": 0.00019998827935671697, "loss": 1.5136, "step": 970 }, { "epoch": 0.03477357781080452, "grad_norm": 1.388515830039978, "learning_rate": 0.00019998810110207553, "loss": 1.7991, "step": 971 }, { "epoch": 0.034809389940372804, "grad_norm": 1.5065860748291016, "learning_rate": 0.00019998792150222316, "loss": 2.0703, "step": 972 }, { "epoch": 0.034845202069941086, "grad_norm": 2.07222580909729, "learning_rate": 0.0001999877405571623, "loss": 1.9729, "step": 973 }, { "epoch": 0.034881014199509376, "grad_norm": 1.977397084236145, "learning_rate": 0.00019998755826689535, "loss": 2.313, "step": 974 }, { "epoch": 0.03491682632907766, "grad_norm": 1.2814414501190186, "learning_rate": 0.00019998737463142478, "loss": 1.7694, "step": 975 }, { "epoch": 0.03495263845864594, "grad_norm": 2.0901002883911133, "learning_rate": 0.00019998718965075305, "loss": 2.186, "step": 976 }, { "epoch": 0.03498845058821423, "grad_norm": 1.2248455286026, "learning_rate": 0.00019998700332488265, "loss": 1.7233, "step": 977 }, { "epoch": 0.035024262717782514, "grad_norm": 1.0053720474243164, "learning_rate": 0.00019998681565381611, "loss": 1.5713, "step": 978 }, { "epoch": 0.0350600748473508, "grad_norm": 1.413627028465271, "learning_rate": 0.00019998662663755595, "loss": 1.908, "step": 979 }, { "epoch": 0.03509588697691908, "grad_norm": 1.2800226211547852, "learning_rate": 0.00019998643627610466, "loss": 1.83, "step": 980 }, { "epoch": 0.03513169910648737, "grad_norm": 1.8141751289367676, "learning_rate": 0.00019998624456946492, "loss": 1.6135, "step": 981 }, { "epoch": 0.03516751123605565, "grad_norm": 1.8859864473342896, "learning_rate": 0.00019998605151763917, "loss": 2.0094, "step": 982 }, { "epoch": 0.035203323365623936, "grad_norm": 1.6842153072357178, "learning_rate": 0.00019998585712063008, "loss": 1.8373, "step": 983 }, { "epoch": 0.03523913549519222, "grad_norm": 1.4130045175552368, "learning_rate": 0.00019998566137844026, "loss": 2.2282, "step": 984 }, { "epoch": 0.03527494762476051, "grad_norm": 2.0987722873687744, "learning_rate": 0.00019998546429107235, "loss": 2.1985, "step": 985 }, { "epoch": 0.03531075975432879, "grad_norm": 1.7609018087387085, "learning_rate": 0.00019998526585852898, "loss": 2.0455, "step": 986 }, { "epoch": 0.035346571883897074, "grad_norm": 1.6820363998413086, "learning_rate": 0.00019998506608081282, "loss": 2.0726, "step": 987 }, { "epoch": 0.035382384013465364, "grad_norm": 2.3095192909240723, "learning_rate": 0.00019998486495792657, "loss": 1.9518, "step": 988 }, { "epoch": 0.035418196143033646, "grad_norm": 1.5085362195968628, "learning_rate": 0.00019998466248987294, "loss": 2.0056, "step": 989 }, { "epoch": 0.03545400827260193, "grad_norm": 1.2365782260894775, "learning_rate": 0.00019998445867665463, "loss": 2.0304, "step": 990 }, { "epoch": 0.03548982040217021, "grad_norm": 1.5272630453109741, "learning_rate": 0.0001999842535182744, "loss": 1.9765, "step": 991 }, { "epoch": 0.0355256325317385, "grad_norm": 1.3069766759872437, "learning_rate": 0.00019998404701473504, "loss": 2.0625, "step": 992 }, { "epoch": 0.035561444661306785, "grad_norm": 1.3852381706237793, "learning_rate": 0.00019998383916603927, "loss": 2.0252, "step": 993 }, { "epoch": 0.03559725679087507, "grad_norm": 2.0161495208740234, "learning_rate": 0.00019998362997218993, "loss": 2.197, "step": 994 }, { "epoch": 0.03563306892044336, "grad_norm": 1.4747849702835083, "learning_rate": 0.0001999834194331898, "loss": 2.02, "step": 995 }, { "epoch": 0.03566888105001164, "grad_norm": 2.3067164421081543, "learning_rate": 0.00019998320754904177, "loss": 2.6484, "step": 996 }, { "epoch": 0.03570469317957992, "grad_norm": 1.9584856033325195, "learning_rate": 0.0001999829943197486, "loss": 1.8812, "step": 997 }, { "epoch": 0.035740505309148206, "grad_norm": 1.4480103254318237, "learning_rate": 0.00019998277974531326, "loss": 1.9695, "step": 998 }, { "epoch": 0.035776317438716496, "grad_norm": 1.4160569906234741, "learning_rate": 0.00019998256382573856, "loss": 2.1651, "step": 999 }, { "epoch": 0.03581212956828478, "grad_norm": 1.8961275815963745, "learning_rate": 0.0001999823465610274, "loss": 2.2543, "step": 1000 }, { "epoch": 0.03584794169785306, "grad_norm": 1.6168714761734009, "learning_rate": 0.0001999821279511828, "loss": 1.8404, "step": 1001 }, { "epoch": 0.03588375382742135, "grad_norm": 1.4820830821990967, "learning_rate": 0.0001999819079962076, "loss": 1.7631, "step": 1002 }, { "epoch": 0.035919565956989634, "grad_norm": 1.6105800867080688, "learning_rate": 0.0001999816866961048, "loss": 2.187, "step": 1003 }, { "epoch": 0.03595537808655792, "grad_norm": 1.5409481525421143, "learning_rate": 0.00019998146405087738, "loss": 1.8704, "step": 1004 }, { "epoch": 0.0359911902161262, "grad_norm": 1.9356262683868408, "learning_rate": 0.00019998124006052832, "loss": 1.9801, "step": 1005 }, { "epoch": 0.03602700234569449, "grad_norm": 1.5342742204666138, "learning_rate": 0.00019998101472506064, "loss": 2.0569, "step": 1006 }, { "epoch": 0.03606281447526277, "grad_norm": 1.746179223060608, "learning_rate": 0.00019998078804447738, "loss": 2.0516, "step": 1007 }, { "epoch": 0.036098626604831055, "grad_norm": 1.6185777187347412, "learning_rate": 0.00019998056001878158, "loss": 1.9516, "step": 1008 }, { "epoch": 0.03613443873439934, "grad_norm": 1.815090537071228, "learning_rate": 0.0001999803306479763, "loss": 1.9385, "step": 1009 }, { "epoch": 0.03617025086396763, "grad_norm": 1.338623285293579, "learning_rate": 0.00019998009993206462, "loss": 1.8261, "step": 1010 }, { "epoch": 0.03620606299353591, "grad_norm": 1.5827144384384155, "learning_rate": 0.0001999798678710497, "loss": 1.7827, "step": 1011 }, { "epoch": 0.03624187512310419, "grad_norm": 1.5187708139419556, "learning_rate": 0.00019997963446493461, "loss": 2.2097, "step": 1012 }, { "epoch": 0.03627768725267248, "grad_norm": 1.234924554824829, "learning_rate": 0.00019997939971372252, "loss": 1.7321, "step": 1013 }, { "epoch": 0.036313499382240766, "grad_norm": 1.9228838682174683, "learning_rate": 0.00019997916361741655, "loss": 2.0538, "step": 1014 }, { "epoch": 0.03634931151180905, "grad_norm": 1.6233713626861572, "learning_rate": 0.0001999789261760199, "loss": 2.216, "step": 1015 }, { "epoch": 0.03638512364137733, "grad_norm": 1.3126624822616577, "learning_rate": 0.00019997868738953577, "loss": 1.7939, "step": 1016 }, { "epoch": 0.03642093577094562, "grad_norm": 1.9218897819519043, "learning_rate": 0.00019997844725796733, "loss": 2.1702, "step": 1017 }, { "epoch": 0.036456747900513904, "grad_norm": 1.6152637004852295, "learning_rate": 0.0001999782057813179, "loss": 1.9599, "step": 1018 }, { "epoch": 0.03649256003008219, "grad_norm": 1.2749489545822144, "learning_rate": 0.00019997796295959065, "loss": 1.9781, "step": 1019 }, { "epoch": 0.03652837215965048, "grad_norm": 1.346900224685669, "learning_rate": 0.00019997771879278883, "loss": 1.8683, "step": 1020 }, { "epoch": 0.03656418428921876, "grad_norm": 1.3836091756820679, "learning_rate": 0.00019997747328091584, "loss": 1.7748, "step": 1021 }, { "epoch": 0.03659999641878704, "grad_norm": 1.3248271942138672, "learning_rate": 0.00019997722642397484, "loss": 2.0045, "step": 1022 }, { "epoch": 0.036635808548355325, "grad_norm": 1.6735011339187622, "learning_rate": 0.00019997697822196926, "loss": 1.9793, "step": 1023 }, { "epoch": 0.036671620677923615, "grad_norm": 2.2710859775543213, "learning_rate": 0.00019997672867490238, "loss": 1.8652, "step": 1024 }, { "epoch": 0.0367074328074919, "grad_norm": 1.8319227695465088, "learning_rate": 0.0001999764777827776, "loss": 2.1901, "step": 1025 }, { "epoch": 0.03674324493706018, "grad_norm": 1.5663321018218994, "learning_rate": 0.00019997622554559824, "loss": 1.9499, "step": 1026 }, { "epoch": 0.03677905706662847, "grad_norm": 1.2792712450027466, "learning_rate": 0.00019997597196336775, "loss": 2.1186, "step": 1027 }, { "epoch": 0.03681486919619675, "grad_norm": 1.6958818435668945, "learning_rate": 0.00019997571703608952, "loss": 1.9744, "step": 1028 }, { "epoch": 0.036850681325765036, "grad_norm": 1.8845629692077637, "learning_rate": 0.00019997546076376695, "loss": 2.0421, "step": 1029 }, { "epoch": 0.03688649345533332, "grad_norm": 2.2948620319366455, "learning_rate": 0.00019997520314640356, "loss": 2.1033, "step": 1030 }, { "epoch": 0.03692230558490161, "grad_norm": 1.8859891891479492, "learning_rate": 0.00019997494418400272, "loss": 1.7958, "step": 1031 }, { "epoch": 0.03695811771446989, "grad_norm": 1.6800627708435059, "learning_rate": 0.00019997468387656796, "loss": 2.0049, "step": 1032 }, { "epoch": 0.036993929844038174, "grad_norm": 2.595055341720581, "learning_rate": 0.00019997442222410283, "loss": 2.2109, "step": 1033 }, { "epoch": 0.03702974197360646, "grad_norm": 1.660823941230774, "learning_rate": 0.0001999741592266108, "loss": 1.9758, "step": 1034 }, { "epoch": 0.03706555410317475, "grad_norm": 2.998129367828369, "learning_rate": 0.0001999738948840954, "loss": 1.9876, "step": 1035 }, { "epoch": 0.03710136623274303, "grad_norm": 1.9035344123840332, "learning_rate": 0.0001999736291965602, "loss": 2.2105, "step": 1036 }, { "epoch": 0.03713717836231131, "grad_norm": 1.6378567218780518, "learning_rate": 0.00019997336216400876, "loss": 1.985, "step": 1037 }, { "epoch": 0.0371729904918796, "grad_norm": 2.2616209983825684, "learning_rate": 0.00019997309378644472, "loss": 2.1327, "step": 1038 }, { "epoch": 0.037208802621447885, "grad_norm": 1.207955241203308, "learning_rate": 0.00019997282406387167, "loss": 1.7478, "step": 1039 }, { "epoch": 0.03724461475101617, "grad_norm": 1.3184400796890259, "learning_rate": 0.00019997255299629318, "loss": 1.6028, "step": 1040 }, { "epoch": 0.03728042688058445, "grad_norm": 1.752772569656372, "learning_rate": 0.00019997228058371298, "loss": 1.9437, "step": 1041 }, { "epoch": 0.03731623901015274, "grad_norm": 1.965004563331604, "learning_rate": 0.00019997200682613468, "loss": 2.4803, "step": 1042 }, { "epoch": 0.037352051139721024, "grad_norm": 1.6652560234069824, "learning_rate": 0.00019997173172356202, "loss": 2.1181, "step": 1043 }, { "epoch": 0.037387863269289306, "grad_norm": 1.3832405805587769, "learning_rate": 0.00019997145527599864, "loss": 2.0115, "step": 1044 }, { "epoch": 0.037423675398857596, "grad_norm": 1.9811400175094604, "learning_rate": 0.00019997117748344825, "loss": 2.0448, "step": 1045 }, { "epoch": 0.03745948752842588, "grad_norm": 1.222140908241272, "learning_rate": 0.00019997089834591466, "loss": 1.8386, "step": 1046 }, { "epoch": 0.03749529965799416, "grad_norm": 1.2641700506210327, "learning_rate": 0.00019997061786340158, "loss": 1.7789, "step": 1047 }, { "epoch": 0.037531111787562445, "grad_norm": 2.9230399131774902, "learning_rate": 0.00019997033603591277, "loss": 2.0249, "step": 1048 }, { "epoch": 0.037566923917130735, "grad_norm": 1.4298175573349, "learning_rate": 0.00019997005286345208, "loss": 1.6235, "step": 1049 }, { "epoch": 0.03760273604669902, "grad_norm": 2.0398781299591064, "learning_rate": 0.00019996976834602324, "loss": 2.2335, "step": 1050 }, { "epoch": 0.0376385481762673, "grad_norm": 2.0259313583374023, "learning_rate": 0.00019996948248363015, "loss": 2.1829, "step": 1051 }, { "epoch": 0.03767436030583559, "grad_norm": 1.503429651260376, "learning_rate": 0.0001999691952762766, "loss": 1.8359, "step": 1052 }, { "epoch": 0.03771017243540387, "grad_norm": 1.3695441484451294, "learning_rate": 0.00019996890672396652, "loss": 1.8916, "step": 1053 }, { "epoch": 0.037745984564972156, "grad_norm": 2.213430881500244, "learning_rate": 0.0001999686168267037, "loss": 2.0709, "step": 1054 }, { "epoch": 0.03778179669454044, "grad_norm": 1.7021877765655518, "learning_rate": 0.0001999683255844921, "loss": 1.8707, "step": 1055 }, { "epoch": 0.03781760882410873, "grad_norm": 1.7013872861862183, "learning_rate": 0.00019996803299733565, "loss": 1.8043, "step": 1056 }, { "epoch": 0.03785342095367701, "grad_norm": 2.1002440452575684, "learning_rate": 0.00019996773906523827, "loss": 1.7684, "step": 1057 }, { "epoch": 0.037889233083245294, "grad_norm": 1.6813231706619263, "learning_rate": 0.0001999674437882039, "loss": 1.9193, "step": 1058 }, { "epoch": 0.03792504521281358, "grad_norm": 1.3570369482040405, "learning_rate": 0.0001999671471662365, "loss": 1.7127, "step": 1059 }, { "epoch": 0.03796085734238187, "grad_norm": 3.0057036876678467, "learning_rate": 0.0001999668491993401, "loss": 2.5179, "step": 1060 }, { "epoch": 0.03799666947195015, "grad_norm": 1.5875788927078247, "learning_rate": 0.00019996654988751867, "loss": 1.9084, "step": 1061 }, { "epoch": 0.03803248160151843, "grad_norm": 4.081822395324707, "learning_rate": 0.0001999662492307763, "loss": 2.2503, "step": 1062 }, { "epoch": 0.03806829373108672, "grad_norm": 1.6430948972702026, "learning_rate": 0.000199965947229117, "loss": 1.6658, "step": 1063 }, { "epoch": 0.038104105860655005, "grad_norm": 1.858124017715454, "learning_rate": 0.0001999656438825448, "loss": 1.8926, "step": 1064 }, { "epoch": 0.03813991799022329, "grad_norm": 1.7443957328796387, "learning_rate": 0.0001999653391910638, "loss": 1.8012, "step": 1065 }, { "epoch": 0.03817573011979157, "grad_norm": 1.912501335144043, "learning_rate": 0.00019996503315467811, "loss": 1.8669, "step": 1066 }, { "epoch": 0.03821154224935986, "grad_norm": 1.8698126077651978, "learning_rate": 0.00019996472577339186, "loss": 2.3256, "step": 1067 }, { "epoch": 0.03824735437892814, "grad_norm": 1.444370985031128, "learning_rate": 0.00019996441704720917, "loss": 1.724, "step": 1068 }, { "epoch": 0.038283166508496426, "grad_norm": 2.0056838989257812, "learning_rate": 0.00019996410697613418, "loss": 1.8959, "step": 1069 }, { "epoch": 0.038318978638064716, "grad_norm": 1.4529732465744019, "learning_rate": 0.0001999637955601711, "loss": 2.0943, "step": 1070 }, { "epoch": 0.038354790767633, "grad_norm": 1.5077389478683472, "learning_rate": 0.00019996348279932406, "loss": 1.6368, "step": 1071 }, { "epoch": 0.03839060289720128, "grad_norm": 1.458603858947754, "learning_rate": 0.0001999631686935973, "loss": 2.1051, "step": 1072 }, { "epoch": 0.038426415026769564, "grad_norm": 1.7620093822479248, "learning_rate": 0.0001999628532429951, "loss": 2.2006, "step": 1073 }, { "epoch": 0.038462227156337854, "grad_norm": 1.673052430152893, "learning_rate": 0.00019996253644752158, "loss": 1.9259, "step": 1074 }, { "epoch": 0.03849803928590614, "grad_norm": 1.3855998516082764, "learning_rate": 0.00019996221830718115, "loss": 1.9441, "step": 1075 }, { "epoch": 0.03853385141547442, "grad_norm": 1.4624788761138916, "learning_rate": 0.00019996189882197797, "loss": 1.9249, "step": 1076 }, { "epoch": 0.0385696635450427, "grad_norm": 2.3567802906036377, "learning_rate": 0.0001999615779919164, "loss": 1.8877, "step": 1077 }, { "epoch": 0.03860547567461099, "grad_norm": 1.6427150964736938, "learning_rate": 0.0001999612558170007, "loss": 2.2207, "step": 1078 }, { "epoch": 0.038641287804179275, "grad_norm": 1.4462939500808716, "learning_rate": 0.0001999609322972353, "loss": 1.9105, "step": 1079 }, { "epoch": 0.03867709993374756, "grad_norm": 1.3780089616775513, "learning_rate": 0.00019996060743262447, "loss": 1.9752, "step": 1080 }, { "epoch": 0.03871291206331585, "grad_norm": 1.4172141551971436, "learning_rate": 0.00019996028122317257, "loss": 1.9202, "step": 1081 }, { "epoch": 0.03874872419288413, "grad_norm": 1.6056876182556152, "learning_rate": 0.00019995995366888408, "loss": 2.0305, "step": 1082 }, { "epoch": 0.03878453632245241, "grad_norm": 1.9504435062408447, "learning_rate": 0.00019995962476976336, "loss": 1.7623, "step": 1083 }, { "epoch": 0.038820348452020696, "grad_norm": 1.192026138305664, "learning_rate": 0.00019995929452581478, "loss": 2.051, "step": 1084 }, { "epoch": 0.038856160581588986, "grad_norm": 2.1449334621429443, "learning_rate": 0.00019995896293704285, "loss": 1.9216, "step": 1085 }, { "epoch": 0.03889197271115727, "grad_norm": 1.6051068305969238, "learning_rate": 0.00019995863000345202, "loss": 1.7794, "step": 1086 }, { "epoch": 0.03892778484072555, "grad_norm": 1.9584460258483887, "learning_rate": 0.00019995829572504677, "loss": 1.84, "step": 1087 }, { "epoch": 0.03896359697029384, "grad_norm": 1.3237011432647705, "learning_rate": 0.00019995796010183157, "loss": 1.9595, "step": 1088 }, { "epoch": 0.038999409099862124, "grad_norm": 1.275923728942871, "learning_rate": 0.00019995762313381095, "loss": 1.8077, "step": 1089 }, { "epoch": 0.03903522122943041, "grad_norm": 1.2776323556900024, "learning_rate": 0.00019995728482098945, "loss": 1.798, "step": 1090 }, { "epoch": 0.03907103335899869, "grad_norm": 1.3595540523529053, "learning_rate": 0.00019995694516337164, "loss": 2.0992, "step": 1091 }, { "epoch": 0.03910684548856698, "grad_norm": 1.3693207502365112, "learning_rate": 0.00019995660416096206, "loss": 1.9173, "step": 1092 }, { "epoch": 0.03914265761813526, "grad_norm": 1.6067368984222412, "learning_rate": 0.00019995626181376527, "loss": 1.9124, "step": 1093 }, { "epoch": 0.039178469747703545, "grad_norm": 1.5119096040725708, "learning_rate": 0.00019995591812178596, "loss": 2.2585, "step": 1094 }, { "epoch": 0.039214281877271835, "grad_norm": 1.692460060119629, "learning_rate": 0.00019995557308502866, "loss": 1.9322, "step": 1095 }, { "epoch": 0.03925009400684012, "grad_norm": 2.6956653594970703, "learning_rate": 0.00019995522670349808, "loss": 2.2339, "step": 1096 }, { "epoch": 0.0392859061364084, "grad_norm": 1.5072599649429321, "learning_rate": 0.00019995487897719888, "loss": 2.0304, "step": 1097 }, { "epoch": 0.039321718265976684, "grad_norm": 1.522534728050232, "learning_rate": 0.00019995452990613567, "loss": 1.7841, "step": 1098 }, { "epoch": 0.03935753039554497, "grad_norm": 3.1163241863250732, "learning_rate": 0.00019995417949031323, "loss": 2.0245, "step": 1099 }, { "epoch": 0.039393342525113256, "grad_norm": 2.7022454738616943, "learning_rate": 0.00019995382772973623, "loss": 2.064, "step": 1100 }, { "epoch": 0.03942915465468154, "grad_norm": 1.7741551399230957, "learning_rate": 0.00019995347462440938, "loss": 1.8704, "step": 1101 }, { "epoch": 0.03946496678424982, "grad_norm": 2.4735567569732666, "learning_rate": 0.0001999531201743375, "loss": 2.2503, "step": 1102 }, { "epoch": 0.03950077891381811, "grad_norm": 2.0130088329315186, "learning_rate": 0.0001999527643795253, "loss": 2.0751, "step": 1103 }, { "epoch": 0.039536591043386395, "grad_norm": 1.6272753477096558, "learning_rate": 0.00019995240723997757, "loss": 2.1966, "step": 1104 }, { "epoch": 0.03957240317295468, "grad_norm": 1.5757356882095337, "learning_rate": 0.00019995204875569914, "loss": 1.7593, "step": 1105 }, { "epoch": 0.03960821530252297, "grad_norm": 1.7916936874389648, "learning_rate": 0.00019995168892669485, "loss": 1.9154, "step": 1106 }, { "epoch": 0.03964402743209125, "grad_norm": 1.6669237613677979, "learning_rate": 0.00019995132775296948, "loss": 2.0772, "step": 1107 }, { "epoch": 0.03967983956165953, "grad_norm": 1.3182016611099243, "learning_rate": 0.00019995096523452795, "loss": 1.6875, "step": 1108 }, { "epoch": 0.039715651691227816, "grad_norm": 1.801761269569397, "learning_rate": 0.0001999506013713751, "loss": 1.8703, "step": 1109 }, { "epoch": 0.039751463820796105, "grad_norm": 1.1991573572158813, "learning_rate": 0.0001999502361635158, "loss": 2.0746, "step": 1110 }, { "epoch": 0.03978727595036439, "grad_norm": 1.6609413623809814, "learning_rate": 0.00019994986961095504, "loss": 2.0265, "step": 1111 }, { "epoch": 0.03982308807993267, "grad_norm": 1.425610065460205, "learning_rate": 0.0001999495017136977, "loss": 1.757, "step": 1112 }, { "epoch": 0.03985890020950096, "grad_norm": 1.956095576286316, "learning_rate": 0.00019994913247174876, "loss": 2.0188, "step": 1113 }, { "epoch": 0.039894712339069244, "grad_norm": 1.771166205406189, "learning_rate": 0.00019994876188511314, "loss": 2.2367, "step": 1114 }, { "epoch": 0.03993052446863753, "grad_norm": 1.6214959621429443, "learning_rate": 0.00019994838995379585, "loss": 1.8904, "step": 1115 }, { "epoch": 0.03996633659820581, "grad_norm": 1.3988220691680908, "learning_rate": 0.0001999480166778019, "loss": 1.7266, "step": 1116 }, { "epoch": 0.0400021487277741, "grad_norm": 1.1129361391067505, "learning_rate": 0.00019994764205713631, "loss": 1.9944, "step": 1117 }, { "epoch": 0.04003796085734238, "grad_norm": 1.3052695989608765, "learning_rate": 0.00019994726609180415, "loss": 1.8591, "step": 1118 }, { "epoch": 0.040073772986910665, "grad_norm": 1.9215950965881348, "learning_rate": 0.00019994688878181044, "loss": 2.0272, "step": 1119 }, { "epoch": 0.040109585116478955, "grad_norm": 1.3812592029571533, "learning_rate": 0.0001999465101271602, "loss": 1.8704, "step": 1120 }, { "epoch": 0.04014539724604724, "grad_norm": 1.5294747352600098, "learning_rate": 0.00019994613012785868, "loss": 1.8968, "step": 1121 }, { "epoch": 0.04018120937561552, "grad_norm": 1.8965163230895996, "learning_rate": 0.00019994574878391084, "loss": 2.0483, "step": 1122 }, { "epoch": 0.0402170215051838, "grad_norm": 1.571008324623108, "learning_rate": 0.00019994536609532187, "loss": 1.5518, "step": 1123 }, { "epoch": 0.04025283363475209, "grad_norm": 2.02764630317688, "learning_rate": 0.00019994498206209695, "loss": 1.5406, "step": 1124 }, { "epoch": 0.040288645764320376, "grad_norm": 1.47233247756958, "learning_rate": 0.0001999445966842412, "loss": 1.9526, "step": 1125 }, { "epoch": 0.04032445789388866, "grad_norm": 1.4829063415527344, "learning_rate": 0.00019994420996175983, "loss": 1.8804, "step": 1126 }, { "epoch": 0.04036027002345694, "grad_norm": 1.8773620128631592, "learning_rate": 0.00019994382189465802, "loss": 2.0499, "step": 1127 }, { "epoch": 0.04039608215302523, "grad_norm": 2.0403313636779785, "learning_rate": 0.000199943432482941, "loss": 1.7639, "step": 1128 }, { "epoch": 0.040431894282593514, "grad_norm": 2.01694917678833, "learning_rate": 0.00019994304172661403, "loss": 2.5179, "step": 1129 }, { "epoch": 0.0404677064121618, "grad_norm": 1.556822419166565, "learning_rate": 0.00019994264962568234, "loss": 2.0218, "step": 1130 }, { "epoch": 0.04050351854173009, "grad_norm": 2.926945924758911, "learning_rate": 0.00019994225618015125, "loss": 2.1212, "step": 1131 }, { "epoch": 0.04053933067129837, "grad_norm": 2.010427713394165, "learning_rate": 0.000199941861390026, "loss": 2.0122, "step": 1132 }, { "epoch": 0.04057514280086665, "grad_norm": 1.425561547279358, "learning_rate": 0.0001999414652553119, "loss": 1.9191, "step": 1133 }, { "epoch": 0.040610954930434935, "grad_norm": 1.2319929599761963, "learning_rate": 0.00019994106777601432, "loss": 1.7172, "step": 1134 }, { "epoch": 0.040646767060003225, "grad_norm": 1.7433674335479736, "learning_rate": 0.00019994066895213857, "loss": 1.8323, "step": 1135 }, { "epoch": 0.04068257918957151, "grad_norm": 1.2644333839416504, "learning_rate": 0.00019994026878369003, "loss": 1.8999, "step": 1136 }, { "epoch": 0.04071839131913979, "grad_norm": 1.5659446716308594, "learning_rate": 0.00019993986727067414, "loss": 1.7909, "step": 1137 }, { "epoch": 0.04075420344870808, "grad_norm": 2.269122362136841, "learning_rate": 0.0001999394644130962, "loss": 2.0758, "step": 1138 }, { "epoch": 0.04079001557827636, "grad_norm": 1.7199034690856934, "learning_rate": 0.00019993906021096168, "loss": 1.8833, "step": 1139 }, { "epoch": 0.040825827707844646, "grad_norm": 1.6728589534759521, "learning_rate": 0.00019993865466427603, "loss": 1.8298, "step": 1140 }, { "epoch": 0.04086163983741293, "grad_norm": 2.495798349380493, "learning_rate": 0.00019993824777304469, "loss": 1.639, "step": 1141 }, { "epoch": 0.04089745196698122, "grad_norm": 1.8760560750961304, "learning_rate": 0.0001999378395372731, "loss": 1.5388, "step": 1142 }, { "epoch": 0.0409332640965495, "grad_norm": 1.9759643077850342, "learning_rate": 0.00019993742995696686, "loss": 2.1794, "step": 1143 }, { "epoch": 0.040969076226117784, "grad_norm": 1.5241389274597168, "learning_rate": 0.0001999370190321314, "loss": 1.9345, "step": 1144 }, { "epoch": 0.041004888355686074, "grad_norm": 1.6700853109359741, "learning_rate": 0.0001999366067627722, "loss": 1.6506, "step": 1145 }, { "epoch": 0.04104070048525436, "grad_norm": 1.6093263626098633, "learning_rate": 0.0001999361931488949, "loss": 1.9827, "step": 1146 }, { "epoch": 0.04107651261482264, "grad_norm": 1.726548194885254, "learning_rate": 0.00019993577819050505, "loss": 1.9425, "step": 1147 }, { "epoch": 0.04111232474439092, "grad_norm": 1.4879534244537354, "learning_rate": 0.00019993536188760817, "loss": 1.8571, "step": 1148 }, { "epoch": 0.04114813687395921, "grad_norm": 2.0006051063537598, "learning_rate": 0.00019993494424020992, "loss": 1.7725, "step": 1149 }, { "epoch": 0.041183949003527495, "grad_norm": 1.6221108436584473, "learning_rate": 0.00019993452524831592, "loss": 1.8815, "step": 1150 }, { "epoch": 0.04121976113309578, "grad_norm": 1.5773656368255615, "learning_rate": 0.0001999341049119318, "loss": 1.7668, "step": 1151 }, { "epoch": 0.04125557326266406, "grad_norm": 1.7638592720031738, "learning_rate": 0.00019993368323106315, "loss": 1.9433, "step": 1152 }, { "epoch": 0.04129138539223235, "grad_norm": 1.4809341430664062, "learning_rate": 0.0001999332602057157, "loss": 1.6965, "step": 1153 }, { "epoch": 0.041327197521800633, "grad_norm": 1.7813769578933716, "learning_rate": 0.0001999328358358952, "loss": 1.8184, "step": 1154 }, { "epoch": 0.041363009651368916, "grad_norm": 1.9684855937957764, "learning_rate": 0.00019993241012160727, "loss": 1.9786, "step": 1155 }, { "epoch": 0.041398821780937206, "grad_norm": 1.7000457048416138, "learning_rate": 0.00019993198306285766, "loss": 1.742, "step": 1156 }, { "epoch": 0.04143463391050549, "grad_norm": 1.2915273904800415, "learning_rate": 0.0001999315546596521, "loss": 1.774, "step": 1157 }, { "epoch": 0.04147044604007377, "grad_norm": 1.3124654293060303, "learning_rate": 0.0001999311249119964, "loss": 1.9188, "step": 1158 }, { "epoch": 0.041506258169642055, "grad_norm": 1.339920997619629, "learning_rate": 0.0001999306938198963, "loss": 1.9007, "step": 1159 }, { "epoch": 0.041542070299210344, "grad_norm": 1.8238441944122314, "learning_rate": 0.00019993026138335763, "loss": 2.3424, "step": 1160 }, { "epoch": 0.04157788242877863, "grad_norm": 1.7708998918533325, "learning_rate": 0.0001999298276023862, "loss": 2.0156, "step": 1161 }, { "epoch": 0.04161369455834691, "grad_norm": 1.3194271326065063, "learning_rate": 0.00019992939247698784, "loss": 1.7528, "step": 1162 }, { "epoch": 0.0416495066879152, "grad_norm": 1.9072855710983276, "learning_rate": 0.00019992895600716838, "loss": 1.944, "step": 1163 }, { "epoch": 0.04168531881748348, "grad_norm": 1.2703431844711304, "learning_rate": 0.00019992851819293373, "loss": 1.8563, "step": 1164 }, { "epoch": 0.041721130947051766, "grad_norm": 1.5858049392700195, "learning_rate": 0.00019992807903428976, "loss": 2.0275, "step": 1165 }, { "epoch": 0.04175694307662005, "grad_norm": 1.2060877084732056, "learning_rate": 0.0001999276385312424, "loss": 1.811, "step": 1166 }, { "epoch": 0.04179275520618834, "grad_norm": 1.198927640914917, "learning_rate": 0.00019992719668379753, "loss": 1.9143, "step": 1167 }, { "epoch": 0.04182856733575662, "grad_norm": 2.0849688053131104, "learning_rate": 0.00019992675349196114, "loss": 2.4221, "step": 1168 }, { "epoch": 0.041864379465324904, "grad_norm": 1.9343287944793701, "learning_rate": 0.0001999263089557392, "loss": 2.047, "step": 1169 }, { "epoch": 0.041900191594893194, "grad_norm": 1.5256718397140503, "learning_rate": 0.00019992586307513767, "loss": 2.1052, "step": 1170 }, { "epoch": 0.041936003724461476, "grad_norm": 1.3801934719085693, "learning_rate": 0.00019992541585016254, "loss": 1.6852, "step": 1171 }, { "epoch": 0.04197181585402976, "grad_norm": 1.4266293048858643, "learning_rate": 0.0001999249672808198, "loss": 1.9, "step": 1172 }, { "epoch": 0.04200762798359804, "grad_norm": 1.2615375518798828, "learning_rate": 0.00019992451736711554, "loss": 2.0813, "step": 1173 }, { "epoch": 0.04204344011316633, "grad_norm": 1.4383363723754883, "learning_rate": 0.00019992406610905582, "loss": 1.6228, "step": 1174 }, { "epoch": 0.042079252242734615, "grad_norm": 1.2527996301651, "learning_rate": 0.00019992361350664663, "loss": 1.8077, "step": 1175 }, { "epoch": 0.0421150643723029, "grad_norm": 1.6648797988891602, "learning_rate": 0.00019992315955989415, "loss": 2.051, "step": 1176 }, { "epoch": 0.04215087650187118, "grad_norm": 1.7546112537384033, "learning_rate": 0.00019992270426880446, "loss": 1.871, "step": 1177 }, { "epoch": 0.04218668863143947, "grad_norm": 1.7627754211425781, "learning_rate": 0.00019992224763338366, "loss": 1.7231, "step": 1178 }, { "epoch": 0.04222250076100775, "grad_norm": 1.8482389450073242, "learning_rate": 0.00019992178965363787, "loss": 1.698, "step": 1179 }, { "epoch": 0.042258312890576036, "grad_norm": 1.2556909322738647, "learning_rate": 0.00019992133032957336, "loss": 1.9038, "step": 1180 }, { "epoch": 0.042294125020144326, "grad_norm": 1.856414794921875, "learning_rate": 0.0001999208696611962, "loss": 1.7274, "step": 1181 }, { "epoch": 0.04232993714971261, "grad_norm": 1.5887939929962158, "learning_rate": 0.00019992040764851263, "loss": 1.7081, "step": 1182 }, { "epoch": 0.04236574927928089, "grad_norm": 1.9165586233139038, "learning_rate": 0.00019991994429152888, "loss": 1.8554, "step": 1183 }, { "epoch": 0.042401561408849174, "grad_norm": 2.281977653503418, "learning_rate": 0.00019991947959025112, "loss": 2.5105, "step": 1184 }, { "epoch": 0.042437373538417464, "grad_norm": 1.645222544670105, "learning_rate": 0.0001999190135446857, "loss": 2.0212, "step": 1185 }, { "epoch": 0.04247318566798575, "grad_norm": 1.8959003686904907, "learning_rate": 0.00019991854615483882, "loss": 2.2363, "step": 1186 }, { "epoch": 0.04250899779755403, "grad_norm": 1.5478854179382324, "learning_rate": 0.00019991807742071678, "loss": 1.9947, "step": 1187 }, { "epoch": 0.04254480992712232, "grad_norm": 1.8765732049942017, "learning_rate": 0.0001999176073423259, "loss": 1.7691, "step": 1188 }, { "epoch": 0.0425806220566906, "grad_norm": 1.8564085960388184, "learning_rate": 0.00019991713591967252, "loss": 1.7588, "step": 1189 }, { "epoch": 0.042616434186258885, "grad_norm": 1.8056801557540894, "learning_rate": 0.00019991666315276292, "loss": 2.027, "step": 1190 }, { "epoch": 0.04265224631582717, "grad_norm": 1.8482861518859863, "learning_rate": 0.0001999161890416035, "loss": 1.8143, "step": 1191 }, { "epoch": 0.04268805844539546, "grad_norm": 1.6742010116577148, "learning_rate": 0.00019991571358620068, "loss": 2.162, "step": 1192 }, { "epoch": 0.04272387057496374, "grad_norm": 1.4236180782318115, "learning_rate": 0.0001999152367865608, "loss": 1.8787, "step": 1193 }, { "epoch": 0.04275968270453202, "grad_norm": 1.2990878820419312, "learning_rate": 0.0001999147586426903, "loss": 1.8382, "step": 1194 }, { "epoch": 0.04279549483410031, "grad_norm": 1.8911601305007935, "learning_rate": 0.00019991427915459558, "loss": 1.8486, "step": 1195 }, { "epoch": 0.042831306963668596, "grad_norm": 1.2889195680618286, "learning_rate": 0.0001999137983222831, "loss": 1.9353, "step": 1196 }, { "epoch": 0.04286711909323688, "grad_norm": 1.7363015413284302, "learning_rate": 0.0001999133161457594, "loss": 1.7446, "step": 1197 }, { "epoch": 0.04290293122280516, "grad_norm": 1.3156371116638184, "learning_rate": 0.00019991283262503083, "loss": 1.9038, "step": 1198 }, { "epoch": 0.04293874335237345, "grad_norm": 1.2407479286193848, "learning_rate": 0.00019991234776010406, "loss": 1.8502, "step": 1199 }, { "epoch": 0.042974555481941734, "grad_norm": 1.3936583995819092, "learning_rate": 0.0001999118615509855, "loss": 1.5118, "step": 1200 }, { "epoch": 0.04301036761151002, "grad_norm": 1.2559468746185303, "learning_rate": 0.00019991137399768166, "loss": 1.9056, "step": 1201 }, { "epoch": 0.0430461797410783, "grad_norm": 2.0051515102386475, "learning_rate": 0.00019991088510019924, "loss": 1.8493, "step": 1202 }, { "epoch": 0.04308199187064659, "grad_norm": 1.2575451135635376, "learning_rate": 0.0001999103948585447, "loss": 1.4424, "step": 1203 }, { "epoch": 0.04311780400021487, "grad_norm": 1.596622109413147, "learning_rate": 0.00019990990327272467, "loss": 2.141, "step": 1204 }, { "epoch": 0.043153616129783155, "grad_norm": 1.6816015243530273, "learning_rate": 0.00019990941034274577, "loss": 1.962, "step": 1205 }, { "epoch": 0.043189428259351445, "grad_norm": 1.785096287727356, "learning_rate": 0.00019990891606861463, "loss": 1.5008, "step": 1206 }, { "epoch": 0.04322524038891973, "grad_norm": 1.742447853088379, "learning_rate": 0.0001999084204503379, "loss": 1.8909, "step": 1207 }, { "epoch": 0.04326105251848801, "grad_norm": 1.6396584510803223, "learning_rate": 0.00019990792348792224, "loss": 1.8476, "step": 1208 }, { "epoch": 0.043296864648056294, "grad_norm": 1.9370383024215698, "learning_rate": 0.00019990742518137436, "loss": 2.1081, "step": 1209 }, { "epoch": 0.04333267677762458, "grad_norm": 2.301330089569092, "learning_rate": 0.00019990692553070093, "loss": 2.0177, "step": 1210 }, { "epoch": 0.043368488907192866, "grad_norm": 1.2723262310028076, "learning_rate": 0.0001999064245359087, "loss": 1.6058, "step": 1211 }, { "epoch": 0.04340430103676115, "grad_norm": 1.3464289903640747, "learning_rate": 0.00019990592219700437, "loss": 1.7071, "step": 1212 }, { "epoch": 0.04344011316632944, "grad_norm": 1.548187017440796, "learning_rate": 0.00019990541851399476, "loss": 1.9317, "step": 1213 }, { "epoch": 0.04347592529589772, "grad_norm": 1.9910813570022583, "learning_rate": 0.00019990491348688657, "loss": 2.1553, "step": 1214 }, { "epoch": 0.043511737425466004, "grad_norm": 1.2897824048995972, "learning_rate": 0.00019990440711568666, "loss": 1.831, "step": 1215 }, { "epoch": 0.04354754955503429, "grad_norm": 1.9216833114624023, "learning_rate": 0.00019990389940040184, "loss": 1.9973, "step": 1216 }, { "epoch": 0.04358336168460258, "grad_norm": 2.3305580615997314, "learning_rate": 0.0001999033903410389, "loss": 1.934, "step": 1217 }, { "epoch": 0.04361917381417086, "grad_norm": 1.5975700616836548, "learning_rate": 0.00019990287993760473, "loss": 1.829, "step": 1218 }, { "epoch": 0.04365498594373914, "grad_norm": 1.2177056074142456, "learning_rate": 0.00019990236819010615, "loss": 1.7145, "step": 1219 }, { "epoch": 0.04369079807330743, "grad_norm": 1.501842737197876, "learning_rate": 0.0001999018550985501, "loss": 1.9816, "step": 1220 }, { "epoch": 0.043726610202875715, "grad_norm": 2.3533644676208496, "learning_rate": 0.00019990134066294338, "loss": 2.2937, "step": 1221 }, { "epoch": 0.043762422332444, "grad_norm": 1.9165318012237549, "learning_rate": 0.00019990082488329308, "loss": 2.0315, "step": 1222 }, { "epoch": 0.04379823446201228, "grad_norm": 1.4523035287857056, "learning_rate": 0.000199900307759606, "loss": 2.2864, "step": 1223 }, { "epoch": 0.04383404659158057, "grad_norm": 1.336516261100769, "learning_rate": 0.00019989978929188914, "loss": 1.9243, "step": 1224 }, { "epoch": 0.043869858721148854, "grad_norm": 1.738339900970459, "learning_rate": 0.00019989926948014945, "loss": 1.7763, "step": 1225 }, { "epoch": 0.043905670850717136, "grad_norm": 2.4466097354888916, "learning_rate": 0.000199898748324394, "loss": 2.1759, "step": 1226 }, { "epoch": 0.04394148298028542, "grad_norm": 1.6366623640060425, "learning_rate": 0.00019989822582462972, "loss": 1.8826, "step": 1227 }, { "epoch": 0.04397729510985371, "grad_norm": 1.563489556312561, "learning_rate": 0.00019989770198086367, "loss": 1.6678, "step": 1228 }, { "epoch": 0.04401310723942199, "grad_norm": 1.8374176025390625, "learning_rate": 0.0001998971767931029, "loss": 1.7684, "step": 1229 }, { "epoch": 0.044048919368990275, "grad_norm": 1.9586941003799438, "learning_rate": 0.0001998966502613545, "loss": 2.132, "step": 1230 }, { "epoch": 0.044084731498558564, "grad_norm": 1.2699816226959229, "learning_rate": 0.0001998961223856255, "loss": 1.9222, "step": 1231 }, { "epoch": 0.04412054362812685, "grad_norm": 1.6662757396697998, "learning_rate": 0.00019989559316592305, "loss": 1.6608, "step": 1232 }, { "epoch": 0.04415635575769513, "grad_norm": 1.3291850090026855, "learning_rate": 0.00019989506260225426, "loss": 2.0618, "step": 1233 }, { "epoch": 0.04419216788726341, "grad_norm": 2.277132272720337, "learning_rate": 0.00019989453069462623, "loss": 1.7717, "step": 1234 }, { "epoch": 0.0442279800168317, "grad_norm": 1.6302454471588135, "learning_rate": 0.00019989399744304616, "loss": 2.0715, "step": 1235 }, { "epoch": 0.044263792146399986, "grad_norm": 1.285477876663208, "learning_rate": 0.0001998934628475212, "loss": 1.5437, "step": 1236 }, { "epoch": 0.04429960427596827, "grad_norm": 2.632361888885498, "learning_rate": 0.00019989292690805854, "loss": 1.8132, "step": 1237 }, { "epoch": 0.04433541640553656, "grad_norm": 1.5045816898345947, "learning_rate": 0.00019989238962466542, "loss": 1.8698, "step": 1238 }, { "epoch": 0.04437122853510484, "grad_norm": 1.6610292196273804, "learning_rate": 0.00019989185099734903, "loss": 1.8337, "step": 1239 }, { "epoch": 0.044407040664673124, "grad_norm": 1.562566876411438, "learning_rate": 0.00019989131102611667, "loss": 1.9222, "step": 1240 }, { "epoch": 0.04444285279424141, "grad_norm": 2.671149969100952, "learning_rate": 0.00019989076971097555, "loss": 1.8991, "step": 1241 }, { "epoch": 0.044478664923809696, "grad_norm": 2.1588051319122314, "learning_rate": 0.00019989022705193299, "loss": 1.9221, "step": 1242 }, { "epoch": 0.04451447705337798, "grad_norm": 1.949323296546936, "learning_rate": 0.00019988968304899624, "loss": 2.0363, "step": 1243 }, { "epoch": 0.04455028918294626, "grad_norm": 1.7885140180587769, "learning_rate": 0.00019988913770217269, "loss": 1.8394, "step": 1244 }, { "epoch": 0.04458610131251455, "grad_norm": 1.935902714729309, "learning_rate": 0.00019988859101146962, "loss": 2.0142, "step": 1245 }, { "epoch": 0.044621913442082835, "grad_norm": 1.9690736532211304, "learning_rate": 0.00019988804297689438, "loss": 1.8359, "step": 1246 }, { "epoch": 0.04465772557165112, "grad_norm": 1.5136144161224365, "learning_rate": 0.0001998874935984544, "loss": 1.9559, "step": 1247 }, { "epoch": 0.0446935377012194, "grad_norm": 1.742443561553955, "learning_rate": 0.00019988694287615704, "loss": 1.9451, "step": 1248 }, { "epoch": 0.04472934983078769, "grad_norm": 1.5419561862945557, "learning_rate": 0.0001998863908100097, "loss": 1.7377, "step": 1249 }, { "epoch": 0.04476516196035597, "grad_norm": 1.9197380542755127, "learning_rate": 0.00019988583740001984, "loss": 1.7331, "step": 1250 }, { "epoch": 0.044800974089924256, "grad_norm": 2.089766502380371, "learning_rate": 0.00019988528264619485, "loss": 1.8769, "step": 1251 }, { "epoch": 0.04483678621949254, "grad_norm": 1.1897839307785034, "learning_rate": 0.00019988472654854222, "loss": 1.9805, "step": 1252 }, { "epoch": 0.04487259834906083, "grad_norm": 2.2890515327453613, "learning_rate": 0.00019988416910706947, "loss": 2.2311, "step": 1253 }, { "epoch": 0.04490841047862911, "grad_norm": 1.2708755731582642, "learning_rate": 0.00019988361032178403, "loss": 1.7924, "step": 1254 }, { "epoch": 0.044944222608197394, "grad_norm": 1.8061339855194092, "learning_rate": 0.00019988305019269346, "loss": 1.7318, "step": 1255 }, { "epoch": 0.044980034737765684, "grad_norm": 1.5179500579833984, "learning_rate": 0.00019988248871980532, "loss": 1.6443, "step": 1256 }, { "epoch": 0.04501584686733397, "grad_norm": 1.4282045364379883, "learning_rate": 0.0001998819259031271, "loss": 1.8233, "step": 1257 }, { "epoch": 0.04505165899690225, "grad_norm": 1.1604241132736206, "learning_rate": 0.00019988136174266643, "loss": 1.8747, "step": 1258 }, { "epoch": 0.04508747112647053, "grad_norm": 2.2839112281799316, "learning_rate": 0.00019988079623843087, "loss": 2.0667, "step": 1259 }, { "epoch": 0.04512328325603882, "grad_norm": 2.5396506786346436, "learning_rate": 0.000199880229390428, "loss": 1.9539, "step": 1260 }, { "epoch": 0.045159095385607105, "grad_norm": 1.8665293455123901, "learning_rate": 0.00019987966119866554, "loss": 1.7051, "step": 1261 }, { "epoch": 0.04519490751517539, "grad_norm": 2.028208017349243, "learning_rate": 0.00019987909166315103, "loss": 1.7659, "step": 1262 }, { "epoch": 0.04523071964474368, "grad_norm": 2.1394529342651367, "learning_rate": 0.00019987852078389218, "loss": 1.6273, "step": 1263 }, { "epoch": 0.04526653177431196, "grad_norm": 2.826866626739502, "learning_rate": 0.00019987794856089668, "loss": 2.0664, "step": 1264 }, { "epoch": 0.04530234390388024, "grad_norm": 1.5801968574523926, "learning_rate": 0.0001998773749941722, "loss": 2.1104, "step": 1265 }, { "epoch": 0.045338156033448526, "grad_norm": 1.9282563924789429, "learning_rate": 0.00019987680008372647, "loss": 1.9696, "step": 1266 }, { "epoch": 0.045373968163016816, "grad_norm": 2.0032389163970947, "learning_rate": 0.00019987622382956722, "loss": 1.8998, "step": 1267 }, { "epoch": 0.0454097802925851, "grad_norm": 1.5961376428604126, "learning_rate": 0.00019987564623170226, "loss": 1.9436, "step": 1268 }, { "epoch": 0.04544559242215338, "grad_norm": 1.54341721534729, "learning_rate": 0.00019987506729013927, "loss": 1.8716, "step": 1269 }, { "epoch": 0.04548140455172167, "grad_norm": 2.1734094619750977, "learning_rate": 0.0001998744870048861, "loss": 1.7878, "step": 1270 }, { "epoch": 0.045517216681289954, "grad_norm": 1.2922240495681763, "learning_rate": 0.0001998739053759505, "loss": 1.7814, "step": 1271 }, { "epoch": 0.04555302881085824, "grad_norm": 1.7018311023712158, "learning_rate": 0.00019987332240334037, "loss": 1.9387, "step": 1272 }, { "epoch": 0.04558884094042652, "grad_norm": 1.7782323360443115, "learning_rate": 0.00019987273808706347, "loss": 1.7283, "step": 1273 }, { "epoch": 0.04562465306999481, "grad_norm": 1.5118334293365479, "learning_rate": 0.00019987215242712775, "loss": 1.7824, "step": 1274 }, { "epoch": 0.04566046519956309, "grad_norm": 1.525036096572876, "learning_rate": 0.00019987156542354103, "loss": 1.7238, "step": 1275 }, { "epoch": 0.045696277329131375, "grad_norm": 1.968327522277832, "learning_rate": 0.00019987097707631124, "loss": 2.0203, "step": 1276 }, { "epoch": 0.04573208945869966, "grad_norm": 1.6078931093215942, "learning_rate": 0.00019987038738544625, "loss": 1.4283, "step": 1277 }, { "epoch": 0.04576790158826795, "grad_norm": 1.5513797998428345, "learning_rate": 0.00019986979635095402, "loss": 1.811, "step": 1278 }, { "epoch": 0.04580371371783623, "grad_norm": 1.5902775526046753, "learning_rate": 0.00019986920397284253, "loss": 1.8791, "step": 1279 }, { "epoch": 0.045839525847404514, "grad_norm": 1.7717511653900146, "learning_rate": 0.0001998686102511197, "loss": 2.1192, "step": 1280 }, { "epoch": 0.0458753379769728, "grad_norm": 1.5827869176864624, "learning_rate": 0.00019986801518579353, "loss": 1.8253, "step": 1281 }, { "epoch": 0.045911150106541086, "grad_norm": 2.1853511333465576, "learning_rate": 0.00019986741877687207, "loss": 1.8936, "step": 1282 }, { "epoch": 0.04594696223610937, "grad_norm": 2.5177838802337646, "learning_rate": 0.00019986682102436328, "loss": 1.8655, "step": 1283 }, { "epoch": 0.04598277436567765, "grad_norm": 1.5945955514907837, "learning_rate": 0.00019986622192827525, "loss": 2.0518, "step": 1284 }, { "epoch": 0.04601858649524594, "grad_norm": 1.603678584098816, "learning_rate": 0.000199865621488616, "loss": 2.0015, "step": 1285 }, { "epoch": 0.046054398624814225, "grad_norm": 1.593038558959961, "learning_rate": 0.00019986501970539367, "loss": 1.9955, "step": 1286 }, { "epoch": 0.04609021075438251, "grad_norm": 1.295050024986267, "learning_rate": 0.0001998644165786163, "loss": 1.8378, "step": 1287 }, { "epoch": 0.0461260228839508, "grad_norm": 1.8528512716293335, "learning_rate": 0.00019986381210829199, "loss": 2.0828, "step": 1288 }, { "epoch": 0.04616183501351908, "grad_norm": 1.5784971714019775, "learning_rate": 0.00019986320629442893, "loss": 1.728, "step": 1289 }, { "epoch": 0.04619764714308736, "grad_norm": 1.469783902168274, "learning_rate": 0.00019986259913703526, "loss": 1.6864, "step": 1290 }, { "epoch": 0.046233459272655646, "grad_norm": 1.7400978803634644, "learning_rate": 0.00019986199063611913, "loss": 1.7075, "step": 1291 }, { "epoch": 0.046269271402223935, "grad_norm": 1.7644522190093994, "learning_rate": 0.0001998613807916887, "loss": 2.161, "step": 1292 }, { "epoch": 0.04630508353179222, "grad_norm": 2.0017952919006348, "learning_rate": 0.00019986076960375223, "loss": 1.9667, "step": 1293 }, { "epoch": 0.0463408956613605, "grad_norm": 1.2968863248825073, "learning_rate": 0.00019986015707231788, "loss": 1.8902, "step": 1294 }, { "epoch": 0.04637670779092879, "grad_norm": 1.572568416595459, "learning_rate": 0.00019985954319739392, "loss": 1.9558, "step": 1295 }, { "epoch": 0.046412519920497074, "grad_norm": 1.356263518333435, "learning_rate": 0.00019985892797898865, "loss": 1.8009, "step": 1296 }, { "epoch": 0.04644833205006536, "grad_norm": 1.2009116411209106, "learning_rate": 0.00019985831141711033, "loss": 1.8879, "step": 1297 }, { "epoch": 0.04648414417963364, "grad_norm": 1.1283270120620728, "learning_rate": 0.00019985769351176723, "loss": 1.7523, "step": 1298 }, { "epoch": 0.04651995630920193, "grad_norm": 1.6041669845581055, "learning_rate": 0.00019985707426296764, "loss": 1.823, "step": 1299 }, { "epoch": 0.04655576843877021, "grad_norm": 1.4035066366195679, "learning_rate": 0.00019985645367071993, "loss": 1.6812, "step": 1300 }, { "epoch": 0.046591580568338495, "grad_norm": 1.4297456741333008, "learning_rate": 0.00019985583173503244, "loss": 1.7875, "step": 1301 }, { "epoch": 0.04662739269790678, "grad_norm": 1.2370003461837769, "learning_rate": 0.00019985520845591356, "loss": 1.7563, "step": 1302 }, { "epoch": 0.04666320482747507, "grad_norm": 1.1710233688354492, "learning_rate": 0.00019985458383337164, "loss": 1.467, "step": 1303 }, { "epoch": 0.04669901695704335, "grad_norm": 1.7941819429397583, "learning_rate": 0.0001998539578674151, "loss": 1.7955, "step": 1304 }, { "epoch": 0.04673482908661163, "grad_norm": 1.6461565494537354, "learning_rate": 0.00019985333055805236, "loss": 1.6701, "step": 1305 }, { "epoch": 0.04677064121617992, "grad_norm": 1.215015172958374, "learning_rate": 0.00019985270190529187, "loss": 1.6302, "step": 1306 }, { "epoch": 0.046806453345748206, "grad_norm": 3.887476921081543, "learning_rate": 0.00019985207190914206, "loss": 1.687, "step": 1307 }, { "epoch": 0.04684226547531649, "grad_norm": 1.9764825105667114, "learning_rate": 0.00019985144056961141, "loss": 2.1641, "step": 1308 }, { "epoch": 0.04687807760488477, "grad_norm": 1.4992644786834717, "learning_rate": 0.00019985080788670847, "loss": 2.0364, "step": 1309 }, { "epoch": 0.04691388973445306, "grad_norm": 2.266002655029297, "learning_rate": 0.00019985017386044167, "loss": 1.8181, "step": 1310 }, { "epoch": 0.046949701864021344, "grad_norm": 2.006157636642456, "learning_rate": 0.00019984953849081958, "loss": 1.9546, "step": 1311 }, { "epoch": 0.04698551399358963, "grad_norm": 1.8313133716583252, "learning_rate": 0.00019984890177785077, "loss": 1.9454, "step": 1312 }, { "epoch": 0.04702132612315792, "grad_norm": 2.395103931427002, "learning_rate": 0.00019984826372154374, "loss": 1.9007, "step": 1313 }, { "epoch": 0.0470571382527262, "grad_norm": 1.6291462182998657, "learning_rate": 0.00019984762432190717, "loss": 1.7289, "step": 1314 }, { "epoch": 0.04709295038229448, "grad_norm": 1.8135216236114502, "learning_rate": 0.00019984698357894957, "loss": 2.0032, "step": 1315 }, { "epoch": 0.047128762511862765, "grad_norm": 1.8101180791854858, "learning_rate": 0.00019984634149267962, "loss": 1.8555, "step": 1316 }, { "epoch": 0.047164574641431055, "grad_norm": 1.441382646560669, "learning_rate": 0.00019984569806310592, "loss": 1.596, "step": 1317 }, { "epoch": 0.04720038677099934, "grad_norm": 1.289527416229248, "learning_rate": 0.00019984505329023717, "loss": 1.6556, "step": 1318 }, { "epoch": 0.04723619890056762, "grad_norm": 2.152682304382324, "learning_rate": 0.000199844407174082, "loss": 2.0037, "step": 1319 }, { "epoch": 0.04727201103013591, "grad_norm": 1.3137688636779785, "learning_rate": 0.00019984375971464913, "loss": 1.4988, "step": 1320 }, { "epoch": 0.04730782315970419, "grad_norm": 2.3812808990478516, "learning_rate": 0.00019984311091194725, "loss": 2.0996, "step": 1321 }, { "epoch": 0.047343635289272476, "grad_norm": 1.7241475582122803, "learning_rate": 0.0001998424607659851, "loss": 1.7122, "step": 1322 }, { "epoch": 0.04737944741884076, "grad_norm": 1.3988949060440063, "learning_rate": 0.00019984180927677146, "loss": 1.6472, "step": 1323 }, { "epoch": 0.04741525954840905, "grad_norm": 1.1787669658660889, "learning_rate": 0.00019984115644431502, "loss": 1.76, "step": 1324 }, { "epoch": 0.04745107167797733, "grad_norm": 2.1474668979644775, "learning_rate": 0.00019984050226862462, "loss": 1.9005, "step": 1325 }, { "epoch": 0.047486883807545614, "grad_norm": 1.143454670906067, "learning_rate": 0.00019983984674970905, "loss": 1.629, "step": 1326 }, { "epoch": 0.0475226959371139, "grad_norm": 1.7645509243011475, "learning_rate": 0.00019983918988757715, "loss": 2.044, "step": 1327 }, { "epoch": 0.04755850806668219, "grad_norm": 1.4978320598602295, "learning_rate": 0.0001998385316822377, "loss": 1.9889, "step": 1328 }, { "epoch": 0.04759432019625047, "grad_norm": 1.7313258647918701, "learning_rate": 0.0001998378721336996, "loss": 2.0302, "step": 1329 }, { "epoch": 0.04763013232581875, "grad_norm": 2.19752836227417, "learning_rate": 0.0001998372112419717, "loss": 2.1565, "step": 1330 }, { "epoch": 0.04766594445538704, "grad_norm": 1.5281023979187012, "learning_rate": 0.00019983654900706293, "loss": 1.7766, "step": 1331 }, { "epoch": 0.047701756584955325, "grad_norm": 1.273146152496338, "learning_rate": 0.00019983588542898218, "loss": 1.3964, "step": 1332 }, { "epoch": 0.04773756871452361, "grad_norm": 1.923797607421875, "learning_rate": 0.00019983522050773833, "loss": 2.0575, "step": 1333 }, { "epoch": 0.04777338084409189, "grad_norm": 2.0104284286499023, "learning_rate": 0.00019983455424334038, "loss": 2.059, "step": 1334 }, { "epoch": 0.04780919297366018, "grad_norm": 1.3276079893112183, "learning_rate": 0.0001998338866357973, "loss": 1.7016, "step": 1335 }, { "epoch": 0.04784500510322846, "grad_norm": 1.1934891939163208, "learning_rate": 0.00019983321768511801, "loss": 1.7879, "step": 1336 }, { "epoch": 0.047880817232796746, "grad_norm": 1.8240382671356201, "learning_rate": 0.00019983254739131158, "loss": 1.9411, "step": 1337 }, { "epoch": 0.047916629362365036, "grad_norm": 1.425972819328308, "learning_rate": 0.000199831875754387, "loss": 1.7801, "step": 1338 }, { "epoch": 0.04795244149193332, "grad_norm": 1.7534042596817017, "learning_rate": 0.00019983120277435333, "loss": 1.6276, "step": 1339 }, { "epoch": 0.0479882536215016, "grad_norm": 1.249691128730774, "learning_rate": 0.00019983052845121954, "loss": 2.0146, "step": 1340 }, { "epoch": 0.048024065751069885, "grad_norm": 1.7718329429626465, "learning_rate": 0.00019982985278499483, "loss": 1.8151, "step": 1341 }, { "epoch": 0.048059877880638174, "grad_norm": 2.18198561668396, "learning_rate": 0.0001998291757756882, "loss": 2.0883, "step": 1342 }, { "epoch": 0.04809569001020646, "grad_norm": 1.7033090591430664, "learning_rate": 0.00019982849742330875, "loss": 1.8902, "step": 1343 }, { "epoch": 0.04813150213977474, "grad_norm": 1.6161448955535889, "learning_rate": 0.00019982781772786564, "loss": 1.9358, "step": 1344 }, { "epoch": 0.04816731426934303, "grad_norm": 2.100522041320801, "learning_rate": 0.00019982713668936805, "loss": 1.798, "step": 1345 }, { "epoch": 0.04820312639891131, "grad_norm": 2.4044883251190186, "learning_rate": 0.00019982645430782506, "loss": 2.3565, "step": 1346 }, { "epoch": 0.048238938528479595, "grad_norm": 1.768221378326416, "learning_rate": 0.00019982577058324589, "loss": 1.9625, "step": 1347 }, { "epoch": 0.04827475065804788, "grad_norm": 1.4462714195251465, "learning_rate": 0.00019982508551563978, "loss": 1.7243, "step": 1348 }, { "epoch": 0.04831056278761617, "grad_norm": 2.0050547122955322, "learning_rate": 0.00019982439910501588, "loss": 1.57, "step": 1349 }, { "epoch": 0.04834637491718445, "grad_norm": 1.5143630504608154, "learning_rate": 0.0001998237113513835, "loss": 1.708, "step": 1350 }, { "epoch": 0.048382187046752734, "grad_norm": 1.6981946229934692, "learning_rate": 0.00019982302225475182, "loss": 2.1118, "step": 1351 }, { "epoch": 0.04841799917632102, "grad_norm": 1.5785638093948364, "learning_rate": 0.0001998223318151301, "loss": 1.6616, "step": 1352 }, { "epoch": 0.048453811305889306, "grad_norm": 1.7291991710662842, "learning_rate": 0.00019982164003252772, "loss": 1.8341, "step": 1353 }, { "epoch": 0.04848962343545759, "grad_norm": 2.4716439247131348, "learning_rate": 0.0001998209469069539, "loss": 2.0056, "step": 1354 }, { "epoch": 0.04852543556502587, "grad_norm": 2.480513334274292, "learning_rate": 0.00019982025243841804, "loss": 2.1853, "step": 1355 }, { "epoch": 0.04856124769459416, "grad_norm": 2.5003104209899902, "learning_rate": 0.00019981955662692942, "loss": 2.2649, "step": 1356 }, { "epoch": 0.048597059824162445, "grad_norm": 1.948498010635376, "learning_rate": 0.00019981885947249742, "loss": 1.8958, "step": 1357 }, { "epoch": 0.04863287195373073, "grad_norm": 1.2755658626556396, "learning_rate": 0.0001998181609751314, "loss": 1.4372, "step": 1358 }, { "epoch": 0.04866868408329901, "grad_norm": 1.3627718687057495, "learning_rate": 0.00019981746113484082, "loss": 1.8786, "step": 1359 }, { "epoch": 0.0487044962128673, "grad_norm": 1.4305596351623535, "learning_rate": 0.00019981675995163505, "loss": 1.7181, "step": 1360 }, { "epoch": 0.04874030834243558, "grad_norm": 1.3786169290542603, "learning_rate": 0.00019981605742552352, "loss": 1.6552, "step": 1361 }, { "epoch": 0.048776120472003866, "grad_norm": 2.100572109222412, "learning_rate": 0.00019981535355651569, "loss": 1.9594, "step": 1362 }, { "epoch": 0.048811932601572156, "grad_norm": 2.110886573791504, "learning_rate": 0.00019981464834462103, "loss": 2.0575, "step": 1363 }, { "epoch": 0.04884774473114044, "grad_norm": 1.4668046236038208, "learning_rate": 0.00019981394178984903, "loss": 1.7259, "step": 1364 }, { "epoch": 0.04888355686070872, "grad_norm": 1.8313469886779785, "learning_rate": 0.0001998132338922092, "loss": 1.9957, "step": 1365 }, { "epoch": 0.048919368990277004, "grad_norm": 1.4437497854232788, "learning_rate": 0.00019981252465171102, "loss": 1.8628, "step": 1366 }, { "epoch": 0.048955181119845294, "grad_norm": 1.769682765007019, "learning_rate": 0.0001998118140683641, "loss": 1.8354, "step": 1367 }, { "epoch": 0.04899099324941358, "grad_norm": 1.4478873014450073, "learning_rate": 0.00019981110214217798, "loss": 1.9215, "step": 1368 }, { "epoch": 0.04902680537898186, "grad_norm": 1.3279414176940918, "learning_rate": 0.00019981038887316221, "loss": 1.6335, "step": 1369 }, { "epoch": 0.04906261750855015, "grad_norm": 1.3693251609802246, "learning_rate": 0.00019980967426132642, "loss": 1.9246, "step": 1370 }, { "epoch": 0.04909842963811843, "grad_norm": 1.9764519929885864, "learning_rate": 0.0001998089583066802, "loss": 1.881, "step": 1371 }, { "epoch": 0.049134241767686715, "grad_norm": 2.154409646987915, "learning_rate": 0.00019980824100923318, "loss": 1.8377, "step": 1372 }, { "epoch": 0.049170053897255, "grad_norm": 1.876036524772644, "learning_rate": 0.00019980752236899502, "loss": 1.976, "step": 1373 }, { "epoch": 0.04920586602682329, "grad_norm": 1.0766030550003052, "learning_rate": 0.00019980680238597542, "loss": 1.7499, "step": 1374 }, { "epoch": 0.04924167815639157, "grad_norm": 1.5491044521331787, "learning_rate": 0.000199806081060184, "loss": 1.9504, "step": 1375 }, { "epoch": 0.04927749028595985, "grad_norm": 1.6977920532226562, "learning_rate": 0.00019980535839163053, "loss": 1.6755, "step": 1376 }, { "epoch": 0.049313302415528136, "grad_norm": 2.227975606918335, "learning_rate": 0.00019980463438032468, "loss": 2.0132, "step": 1377 }, { "epoch": 0.049349114545096426, "grad_norm": 1.4667011499404907, "learning_rate": 0.0001998039090262762, "loss": 1.8883, "step": 1378 }, { "epoch": 0.04938492667466471, "grad_norm": 1.5283125638961792, "learning_rate": 0.0001998031823294949, "loss": 1.8843, "step": 1379 }, { "epoch": 0.04942073880423299, "grad_norm": 1.5257043838500977, "learning_rate": 0.0001998024542899905, "loss": 1.8381, "step": 1380 }, { "epoch": 0.04945655093380128, "grad_norm": 1.9778072834014893, "learning_rate": 0.00019980172490777283, "loss": 1.9028, "step": 1381 }, { "epoch": 0.049492363063369564, "grad_norm": 1.3122044801712036, "learning_rate": 0.00019980099418285166, "loss": 1.6511, "step": 1382 }, { "epoch": 0.04952817519293785, "grad_norm": 1.8901945352554321, "learning_rate": 0.00019980026211523686, "loss": 1.6198, "step": 1383 }, { "epoch": 0.04956398732250613, "grad_norm": 1.79074227809906, "learning_rate": 0.00019979952870493824, "loss": 1.8086, "step": 1384 }, { "epoch": 0.04959979945207442, "grad_norm": 1.6266603469848633, "learning_rate": 0.00019979879395196575, "loss": 1.6447, "step": 1385 }, { "epoch": 0.0496356115816427, "grad_norm": 1.4488394260406494, "learning_rate": 0.00019979805785632916, "loss": 2.0551, "step": 1386 }, { "epoch": 0.049671423711210985, "grad_norm": 3.32766056060791, "learning_rate": 0.00019979732041803847, "loss": 2.1713, "step": 1387 }, { "epoch": 0.049707235840779275, "grad_norm": 1.405636191368103, "learning_rate": 0.00019979658163710355, "loss": 1.6463, "step": 1388 }, { "epoch": 0.04974304797034756, "grad_norm": 1.9788293838500977, "learning_rate": 0.00019979584151353437, "loss": 1.9736, "step": 1389 }, { "epoch": 0.04977886009991584, "grad_norm": 2.140531539916992, "learning_rate": 0.00019979510004734083, "loss": 1.9015, "step": 1390 }, { "epoch": 0.049814672229484123, "grad_norm": 2.0656092166900635, "learning_rate": 0.00019979435723853296, "loss": 1.8466, "step": 1391 }, { "epoch": 0.04985048435905241, "grad_norm": 1.4488940238952637, "learning_rate": 0.00019979361308712073, "loss": 1.7686, "step": 1392 }, { "epoch": 0.049886296488620696, "grad_norm": 2.0686659812927246, "learning_rate": 0.00019979286759311423, "loss": 1.8533, "step": 1393 }, { "epoch": 0.04992210861818898, "grad_norm": 2.168595314025879, "learning_rate": 0.00019979212075652334, "loss": 2.1408, "step": 1394 }, { "epoch": 0.04995792074775727, "grad_norm": 1.4077308177947998, "learning_rate": 0.00019979137257735823, "loss": 1.8534, "step": 1395 }, { "epoch": 0.04999373287732555, "grad_norm": 1.5167765617370605, "learning_rate": 0.0001997906230556289, "loss": 1.7809, "step": 1396 }, { "epoch": 0.050029545006893834, "grad_norm": 2.342071771621704, "learning_rate": 0.00019978987219134545, "loss": 1.8212, "step": 1397 }, { "epoch": 0.05006535713646212, "grad_norm": 1.417253851890564, "learning_rate": 0.000199789119984518, "loss": 2.1048, "step": 1398 }, { "epoch": 0.05010116926603041, "grad_norm": 1.8908305168151855, "learning_rate": 0.0001997883664351567, "loss": 1.951, "step": 1399 }, { "epoch": 0.05013698139559869, "grad_norm": 2.8274893760681152, "learning_rate": 0.00019978761154327158, "loss": 2.3188, "step": 1400 }, { "epoch": 0.05017279352516697, "grad_norm": 1.62851083278656, "learning_rate": 0.0001997868553088729, "loss": 1.9725, "step": 1401 }, { "epoch": 0.050208605654735255, "grad_norm": 2.5111052989959717, "learning_rate": 0.00019978609773197082, "loss": 2.2518, "step": 1402 }, { "epoch": 0.050244417784303545, "grad_norm": 1.185157299041748, "learning_rate": 0.00019978533881257547, "loss": 1.675, "step": 1403 }, { "epoch": 0.05028022991387183, "grad_norm": 1.112566590309143, "learning_rate": 0.0001997845785506971, "loss": 1.7595, "step": 1404 }, { "epoch": 0.05031604204344011, "grad_norm": 1.807395339012146, "learning_rate": 0.00019978381694634595, "loss": 1.9387, "step": 1405 }, { "epoch": 0.0503518541730084, "grad_norm": 1.576179027557373, "learning_rate": 0.00019978305399953228, "loss": 1.6791, "step": 1406 }, { "epoch": 0.050387666302576684, "grad_norm": 1.4238735437393188, "learning_rate": 0.0001997822897102663, "loss": 1.8154, "step": 1407 }, { "epoch": 0.050423478432144966, "grad_norm": 1.8287601470947266, "learning_rate": 0.00019978152407855833, "loss": 1.8966, "step": 1408 }, { "epoch": 0.05045929056171325, "grad_norm": 1.644075632095337, "learning_rate": 0.00019978075710441867, "loss": 1.8428, "step": 1409 }, { "epoch": 0.05049510269128154, "grad_norm": 1.835912823677063, "learning_rate": 0.0001997799887878576, "loss": 1.8366, "step": 1410 }, { "epoch": 0.05053091482084982, "grad_norm": 1.9325989484786987, "learning_rate": 0.0001997792191288855, "loss": 1.534, "step": 1411 }, { "epoch": 0.050566726950418105, "grad_norm": 1.7003639936447144, "learning_rate": 0.00019977844812751273, "loss": 1.8233, "step": 1412 }, { "epoch": 0.050602539079986394, "grad_norm": 1.4617531299591064, "learning_rate": 0.00019977767578374965, "loss": 1.8938, "step": 1413 }, { "epoch": 0.05063835120955468, "grad_norm": 1.3252395391464233, "learning_rate": 0.0001997769020976066, "loss": 1.9042, "step": 1414 }, { "epoch": 0.05067416333912296, "grad_norm": 1.881447672843933, "learning_rate": 0.0001997761270690941, "loss": 1.8318, "step": 1415 }, { "epoch": 0.05070997546869124, "grad_norm": 2.197857618331909, "learning_rate": 0.00019977535069822246, "loss": 1.7475, "step": 1416 }, { "epoch": 0.05074578759825953, "grad_norm": 1.581891655921936, "learning_rate": 0.0001997745729850022, "loss": 1.7239, "step": 1417 }, { "epoch": 0.050781599727827816, "grad_norm": 1.7551162242889404, "learning_rate": 0.00019977379392944377, "loss": 1.8272, "step": 1418 }, { "epoch": 0.0508174118573961, "grad_norm": 1.9000366926193237, "learning_rate": 0.00019977301353155764, "loss": 2.1745, "step": 1419 }, { "epoch": 0.05085322398696439, "grad_norm": 1.7322187423706055, "learning_rate": 0.00019977223179135428, "loss": 1.9419, "step": 1420 }, { "epoch": 0.05088903611653267, "grad_norm": 1.9449477195739746, "learning_rate": 0.0001997714487088443, "loss": 1.7893, "step": 1421 }, { "epoch": 0.050924848246100954, "grad_norm": 1.3871630430221558, "learning_rate": 0.0001997706642840381, "loss": 1.9459, "step": 1422 }, { "epoch": 0.05096066037566924, "grad_norm": 1.2880792617797852, "learning_rate": 0.00019976987851694634, "loss": 1.7042, "step": 1423 }, { "epoch": 0.050996472505237526, "grad_norm": 2.2730605602264404, "learning_rate": 0.00019976909140757956, "loss": 1.744, "step": 1424 }, { "epoch": 0.05103228463480581, "grad_norm": 2.3292646408081055, "learning_rate": 0.00019976830295594832, "loss": 2.1704, "step": 1425 }, { "epoch": 0.05106809676437409, "grad_norm": 2.2148680686950684, "learning_rate": 0.0001997675131620633, "loss": 1.7412, "step": 1426 }, { "epoch": 0.051103908893942375, "grad_norm": 2.116534471511841, "learning_rate": 0.00019976672202593506, "loss": 1.7296, "step": 1427 }, { "epoch": 0.051139721023510665, "grad_norm": 1.278947114944458, "learning_rate": 0.00019976592954757427, "loss": 1.4632, "step": 1428 }, { "epoch": 0.05117553315307895, "grad_norm": 1.4900505542755127, "learning_rate": 0.00019976513572699157, "loss": 1.8935, "step": 1429 }, { "epoch": 0.05121134528264723, "grad_norm": 1.7054023742675781, "learning_rate": 0.00019976434056419767, "loss": 1.681, "step": 1430 }, { "epoch": 0.05124715741221552, "grad_norm": 2.0350306034088135, "learning_rate": 0.00019976354405920328, "loss": 1.8399, "step": 1431 }, { "epoch": 0.0512829695417838, "grad_norm": 1.9636918306350708, "learning_rate": 0.00019976274621201907, "loss": 2.0888, "step": 1432 }, { "epoch": 0.051318781671352086, "grad_norm": 1.9971915483474731, "learning_rate": 0.00019976194702265578, "loss": 1.7744, "step": 1433 }, { "epoch": 0.05135459380092037, "grad_norm": 2.1631476879119873, "learning_rate": 0.00019976114649112418, "loss": 2.0092, "step": 1434 }, { "epoch": 0.05139040593048866, "grad_norm": 3.263909339904785, "learning_rate": 0.00019976034461743504, "loss": 2.3351, "step": 1435 }, { "epoch": 0.05142621806005694, "grad_norm": 1.6741386651992798, "learning_rate": 0.00019975954140159915, "loss": 1.7882, "step": 1436 }, { "epoch": 0.051462030189625224, "grad_norm": 1.8211408853530884, "learning_rate": 0.0001997587368436273, "loss": 1.6995, "step": 1437 }, { "epoch": 0.051497842319193514, "grad_norm": 1.880556344985962, "learning_rate": 0.00019975793094353036, "loss": 2.0151, "step": 1438 }, { "epoch": 0.0515336544487618, "grad_norm": 1.701261281967163, "learning_rate": 0.0001997571237013191, "loss": 1.6038, "step": 1439 }, { "epoch": 0.05156946657833008, "grad_norm": 1.4975484609603882, "learning_rate": 0.00019975631511700442, "loss": 1.5615, "step": 1440 }, { "epoch": 0.05160527870789836, "grad_norm": 1.6058564186096191, "learning_rate": 0.00019975550519059723, "loss": 2.0746, "step": 1441 }, { "epoch": 0.05164109083746665, "grad_norm": 1.5407977104187012, "learning_rate": 0.00019975469392210834, "loss": 1.893, "step": 1442 }, { "epoch": 0.051676902967034935, "grad_norm": 2.4655606746673584, "learning_rate": 0.00019975388131154875, "loss": 2.4228, "step": 1443 }, { "epoch": 0.05171271509660322, "grad_norm": 2.1224660873413086, "learning_rate": 0.00019975306735892936, "loss": 1.84, "step": 1444 }, { "epoch": 0.05174852722617151, "grad_norm": 1.920393943786621, "learning_rate": 0.00019975225206426113, "loss": 1.8256, "step": 1445 }, { "epoch": 0.05178433935573979, "grad_norm": 1.4037801027297974, "learning_rate": 0.000199751435427555, "loss": 1.6021, "step": 1446 }, { "epoch": 0.05182015148530807, "grad_norm": 2.118389844894409, "learning_rate": 0.000199750617448822, "loss": 1.7848, "step": 1447 }, { "epoch": 0.051855963614876356, "grad_norm": 1.7762823104858398, "learning_rate": 0.0001997497981280731, "loss": 1.8298, "step": 1448 }, { "epoch": 0.051891775744444646, "grad_norm": 2.352095127105713, "learning_rate": 0.0001997489774653193, "loss": 1.9828, "step": 1449 }, { "epoch": 0.05192758787401293, "grad_norm": 3.4863343238830566, "learning_rate": 0.00019974815546057172, "loss": 2.2459, "step": 1450 }, { "epoch": 0.05196340000358121, "grad_norm": 2.1996467113494873, "learning_rate": 0.00019974733211384135, "loss": 1.7053, "step": 1451 }, { "epoch": 0.051999212133149494, "grad_norm": 1.548524022102356, "learning_rate": 0.0001997465074251393, "loss": 1.6912, "step": 1452 }, { "epoch": 0.052035024262717784, "grad_norm": 1.4116077423095703, "learning_rate": 0.00019974568139447666, "loss": 1.6227, "step": 1453 }, { "epoch": 0.05207083639228607, "grad_norm": 1.6894891262054443, "learning_rate": 0.00019974485402186453, "loss": 1.599, "step": 1454 }, { "epoch": 0.05210664852185435, "grad_norm": 2.850682497024536, "learning_rate": 0.00019974402530731407, "loss": 1.929, "step": 1455 }, { "epoch": 0.05214246065142264, "grad_norm": 2.964691400527954, "learning_rate": 0.0001997431952508364, "loss": 1.4999, "step": 1456 }, { "epoch": 0.05217827278099092, "grad_norm": 2.3899643421173096, "learning_rate": 0.00019974236385244268, "loss": 1.8135, "step": 1457 }, { "epoch": 0.052214084910559205, "grad_norm": 3.404496431350708, "learning_rate": 0.00019974153111214414, "loss": 2.2774, "step": 1458 }, { "epoch": 0.05224989704012749, "grad_norm": 1.3085155487060547, "learning_rate": 0.00019974069702995194, "loss": 1.7506, "step": 1459 }, { "epoch": 0.05228570916969578, "grad_norm": 1.9195306301116943, "learning_rate": 0.00019973986160587732, "loss": 2.0551, "step": 1460 }, { "epoch": 0.05232152129926406, "grad_norm": 1.2769794464111328, "learning_rate": 0.0001997390248399315, "loss": 1.6659, "step": 1461 }, { "epoch": 0.052357333428832344, "grad_norm": 1.5254219770431519, "learning_rate": 0.00019973818673212578, "loss": 1.3987, "step": 1462 }, { "epoch": 0.05239314555840063, "grad_norm": 1.952865481376648, "learning_rate": 0.00019973734728247143, "loss": 1.6659, "step": 1463 }, { "epoch": 0.052428957687968916, "grad_norm": 1.9222220182418823, "learning_rate": 0.0001997365064909797, "loss": 1.6806, "step": 1464 }, { "epoch": 0.0524647698175372, "grad_norm": 1.1028681993484497, "learning_rate": 0.0001997356643576619, "loss": 1.8794, "step": 1465 }, { "epoch": 0.05250058194710548, "grad_norm": 1.5954684019088745, "learning_rate": 0.00019973482088252943, "loss": 1.6585, "step": 1466 }, { "epoch": 0.05253639407667377, "grad_norm": 2.3468151092529297, "learning_rate": 0.00019973397606559354, "loss": 1.9015, "step": 1467 }, { "epoch": 0.052572206206242054, "grad_norm": 1.6654571294784546, "learning_rate": 0.0001997331299068657, "loss": 1.9589, "step": 1468 }, { "epoch": 0.05260801833581034, "grad_norm": 1.9264411926269531, "learning_rate": 0.00019973228240635722, "loss": 1.9634, "step": 1469 }, { "epoch": 0.05264383046537863, "grad_norm": 1.5382975339889526, "learning_rate": 0.00019973143356407952, "loss": 1.7071, "step": 1470 }, { "epoch": 0.05267964259494691, "grad_norm": 1.8750600814819336, "learning_rate": 0.00019973058338004407, "loss": 1.8874, "step": 1471 }, { "epoch": 0.05271545472451519, "grad_norm": 2.4676332473754883, "learning_rate": 0.00019972973185426222, "loss": 2.0747, "step": 1472 }, { "epoch": 0.052751266854083476, "grad_norm": 1.5208827257156372, "learning_rate": 0.0001997288789867455, "loss": 1.8408, "step": 1473 }, { "epoch": 0.052787078983651765, "grad_norm": 2.3117616176605225, "learning_rate": 0.0001997280247775053, "loss": 1.6432, "step": 1474 }, { "epoch": 0.05282289111322005, "grad_norm": 1.715316653251648, "learning_rate": 0.0001997271692265532, "loss": 1.494, "step": 1475 }, { "epoch": 0.05285870324278833, "grad_norm": 1.7642710208892822, "learning_rate": 0.0001997263123339007, "loss": 1.6121, "step": 1476 }, { "epoch": 0.052894515372356614, "grad_norm": 1.6220943927764893, "learning_rate": 0.00019972545409955927, "loss": 1.4739, "step": 1477 }, { "epoch": 0.052930327501924904, "grad_norm": 1.6621453762054443, "learning_rate": 0.0001997245945235405, "loss": 2.0465, "step": 1478 }, { "epoch": 0.052966139631493186, "grad_norm": 1.6994413137435913, "learning_rate": 0.00019972373360585598, "loss": 1.8201, "step": 1479 }, { "epoch": 0.05300195176106147, "grad_norm": 1.6507011651992798, "learning_rate": 0.0001997228713465172, "loss": 1.7818, "step": 1480 }, { "epoch": 0.05303776389062976, "grad_norm": 2.683297872543335, "learning_rate": 0.00019972200774553587, "loss": 2.1151, "step": 1481 }, { "epoch": 0.05307357602019804, "grad_norm": 1.821467399597168, "learning_rate": 0.00019972114280292355, "loss": 1.759, "step": 1482 }, { "epoch": 0.053109388149766325, "grad_norm": 1.6540615558624268, "learning_rate": 0.00019972027651869186, "loss": 1.6425, "step": 1483 }, { "epoch": 0.05314520027933461, "grad_norm": 1.185801386833191, "learning_rate": 0.0001997194088928525, "loss": 1.598, "step": 1484 }, { "epoch": 0.0531810124089029, "grad_norm": 1.5758860111236572, "learning_rate": 0.0001997185399254171, "loss": 1.8597, "step": 1485 }, { "epoch": 0.05321682453847118, "grad_norm": 1.5715807676315308, "learning_rate": 0.00019971766961639738, "loss": 1.6007, "step": 1486 }, { "epoch": 0.05325263666803946, "grad_norm": 2.0418407917022705, "learning_rate": 0.00019971679796580504, "loss": 2.1591, "step": 1487 }, { "epoch": 0.05328844879760775, "grad_norm": 1.2468957901000977, "learning_rate": 0.00019971592497365184, "loss": 1.6722, "step": 1488 }, { "epoch": 0.053324260927176036, "grad_norm": 1.5520515441894531, "learning_rate": 0.00019971505063994948, "loss": 1.7877, "step": 1489 }, { "epoch": 0.05336007305674432, "grad_norm": 1.990512728691101, "learning_rate": 0.0001997141749647097, "loss": 1.8976, "step": 1490 }, { "epoch": 0.0533958851863126, "grad_norm": 1.7781422138214111, "learning_rate": 0.00019971329794794436, "loss": 2.0116, "step": 1491 }, { "epoch": 0.05343169731588089, "grad_norm": 1.6702563762664795, "learning_rate": 0.0001997124195896652, "loss": 1.7436, "step": 1492 }, { "epoch": 0.053467509445449174, "grad_norm": 2.4951159954071045, "learning_rate": 0.00019971153988988406, "loss": 1.7853, "step": 1493 }, { "epoch": 0.05350332157501746, "grad_norm": 1.4511957168579102, "learning_rate": 0.00019971065884861276, "loss": 1.6299, "step": 1494 }, { "epoch": 0.05353913370458575, "grad_norm": 1.593092679977417, "learning_rate": 0.00019970977646586319, "loss": 2.1281, "step": 1495 }, { "epoch": 0.05357494583415403, "grad_norm": 2.2723426818847656, "learning_rate": 0.00019970889274164715, "loss": 1.4845, "step": 1496 }, { "epoch": 0.05361075796372231, "grad_norm": 2.1010289192199707, "learning_rate": 0.00019970800767597663, "loss": 1.995, "step": 1497 }, { "epoch": 0.053646570093290595, "grad_norm": 1.3042644262313843, "learning_rate": 0.00019970712126886342, "loss": 2.1141, "step": 1498 }, { "epoch": 0.053682382222858885, "grad_norm": 1.9947757720947266, "learning_rate": 0.00019970623352031952, "loss": 1.8331, "step": 1499 }, { "epoch": 0.05371819435242717, "grad_norm": 2.3290586471557617, "learning_rate": 0.00019970534443035688, "loss": 1.6654, "step": 1500 }, { "epoch": 0.05375400648199545, "grad_norm": 3.0495433807373047, "learning_rate": 0.00019970445399898745, "loss": 1.8413, "step": 1501 }, { "epoch": 0.05378981861156373, "grad_norm": 2.522512435913086, "learning_rate": 0.0001997035622262232, "loss": 1.8241, "step": 1502 }, { "epoch": 0.05382563074113202, "grad_norm": 2.1961100101470947, "learning_rate": 0.00019970266911207608, "loss": 1.6946, "step": 1503 }, { "epoch": 0.053861442870700306, "grad_norm": 1.8775655031204224, "learning_rate": 0.00019970177465655818, "loss": 1.8042, "step": 1504 }, { "epoch": 0.05389725500026859, "grad_norm": 3.8398947715759277, "learning_rate": 0.00019970087885968154, "loss": 2.0227, "step": 1505 }, { "epoch": 0.05393306712983688, "grad_norm": 1.3387746810913086, "learning_rate": 0.00019969998172145815, "loss": 1.6747, "step": 1506 }, { "epoch": 0.05396887925940516, "grad_norm": 1.517892599105835, "learning_rate": 0.00019969908324190012, "loss": 1.8804, "step": 1507 }, { "epoch": 0.054004691388973444, "grad_norm": 1.1590577363967896, "learning_rate": 0.0001996981834210195, "loss": 1.6391, "step": 1508 }, { "epoch": 0.05404050351854173, "grad_norm": 2.3728671073913574, "learning_rate": 0.00019969728225882846, "loss": 1.6555, "step": 1509 }, { "epoch": 0.05407631564811002, "grad_norm": 1.6440011262893677, "learning_rate": 0.0001996963797553391, "loss": 1.7857, "step": 1510 }, { "epoch": 0.0541121277776783, "grad_norm": 1.4567910432815552, "learning_rate": 0.0001996954759105635, "loss": 1.9391, "step": 1511 }, { "epoch": 0.05414793990724658, "grad_norm": 1.946500539779663, "learning_rate": 0.00019969457072451392, "loss": 1.7353, "step": 1512 }, { "epoch": 0.05418375203681487, "grad_norm": 2.7675740718841553, "learning_rate": 0.00019969366419720245, "loss": 2.133, "step": 1513 }, { "epoch": 0.054219564166383155, "grad_norm": 2.4415030479431152, "learning_rate": 0.00019969275632864133, "loss": 1.9024, "step": 1514 }, { "epoch": 0.05425537629595144, "grad_norm": 1.4757903814315796, "learning_rate": 0.0001996918471188428, "loss": 1.7258, "step": 1515 }, { "epoch": 0.05429118842551972, "grad_norm": 1.7123427391052246, "learning_rate": 0.00019969093656781902, "loss": 1.7771, "step": 1516 }, { "epoch": 0.05432700055508801, "grad_norm": 1.3325016498565674, "learning_rate": 0.00019969002467558228, "loss": 1.7978, "step": 1517 }, { "epoch": 0.05436281268465629, "grad_norm": 1.9106487035751343, "learning_rate": 0.00019968911144214486, "loss": 1.8826, "step": 1518 }, { "epoch": 0.054398624814224576, "grad_norm": 2.213944435119629, "learning_rate": 0.00019968819686751906, "loss": 1.9959, "step": 1519 }, { "epoch": 0.054434436943792866, "grad_norm": 2.123725175857544, "learning_rate": 0.00019968728095171715, "loss": 2.1413, "step": 1520 }, { "epoch": 0.05447024907336115, "grad_norm": 1.2367082834243774, "learning_rate": 0.00019968636369475142, "loss": 1.6893, "step": 1521 }, { "epoch": 0.05450606120292943, "grad_norm": 1.6155388355255127, "learning_rate": 0.00019968544509663428, "loss": 1.9238, "step": 1522 }, { "epoch": 0.054541873332497715, "grad_norm": 2.1532177925109863, "learning_rate": 0.00019968452515737805, "loss": 1.8312, "step": 1523 }, { "epoch": 0.054577685462066004, "grad_norm": 2.00732421875, "learning_rate": 0.00019968360387699513, "loss": 1.8616, "step": 1524 }, { "epoch": 0.05461349759163429, "grad_norm": 2.6617205142974854, "learning_rate": 0.00019968268125549794, "loss": 1.9007, "step": 1525 }, { "epoch": 0.05464930972120257, "grad_norm": 1.5837981700897217, "learning_rate": 0.0001996817572928988, "loss": 1.8432, "step": 1526 }, { "epoch": 0.05468512185077085, "grad_norm": 1.387168526649475, "learning_rate": 0.0001996808319892102, "loss": 1.7933, "step": 1527 }, { "epoch": 0.05472093398033914, "grad_norm": 1.664420247077942, "learning_rate": 0.00019967990534444462, "loss": 1.8014, "step": 1528 }, { "epoch": 0.054756746109907425, "grad_norm": 1.184078574180603, "learning_rate": 0.00019967897735861446, "loss": 1.451, "step": 1529 }, { "epoch": 0.05479255823947571, "grad_norm": 2.2369894981384277, "learning_rate": 0.00019967804803173227, "loss": 1.5848, "step": 1530 }, { "epoch": 0.054828370369044, "grad_norm": 1.4040497541427612, "learning_rate": 0.00019967711736381048, "loss": 1.8061, "step": 1531 }, { "epoch": 0.05486418249861228, "grad_norm": 2.491568088531494, "learning_rate": 0.00019967618535486164, "loss": 1.9725, "step": 1532 }, { "epoch": 0.054899994628180564, "grad_norm": 1.9477348327636719, "learning_rate": 0.00019967525200489833, "loss": 1.6782, "step": 1533 }, { "epoch": 0.054935806757748847, "grad_norm": 1.5153226852416992, "learning_rate": 0.00019967431731393308, "loss": 1.8764, "step": 1534 }, { "epoch": 0.054971618887317136, "grad_norm": 2.029557704925537, "learning_rate": 0.00019967338128197847, "loss": 1.8136, "step": 1535 }, { "epoch": 0.05500743101688542, "grad_norm": 2.0749216079711914, "learning_rate": 0.00019967244390904708, "loss": 1.9191, "step": 1536 }, { "epoch": 0.0550432431464537, "grad_norm": 1.8724786043167114, "learning_rate": 0.0001996715051951515, "loss": 1.7789, "step": 1537 }, { "epoch": 0.05507905527602199, "grad_norm": 1.8553539514541626, "learning_rate": 0.0001996705651403044, "loss": 1.9742, "step": 1538 }, { "epoch": 0.055114867405590275, "grad_norm": 1.4529833793640137, "learning_rate": 0.0001996696237445184, "loss": 1.5149, "step": 1539 }, { "epoch": 0.05515067953515856, "grad_norm": 1.6312896013259888, "learning_rate": 0.0001996686810078062, "loss": 1.8811, "step": 1540 }, { "epoch": 0.05518649166472684, "grad_norm": 2.4548962116241455, "learning_rate": 0.00019966773693018045, "loss": 1.9498, "step": 1541 }, { "epoch": 0.05522230379429513, "grad_norm": 1.8022762537002563, "learning_rate": 0.00019966679151165384, "loss": 1.9692, "step": 1542 }, { "epoch": 0.05525811592386341, "grad_norm": 2.094322919845581, "learning_rate": 0.00019966584475223913, "loss": 1.7657, "step": 1543 }, { "epoch": 0.055293928053431696, "grad_norm": 1.8352880477905273, "learning_rate": 0.00019966489665194904, "loss": 1.7019, "step": 1544 }, { "epoch": 0.055329740182999985, "grad_norm": 3.184751033782959, "learning_rate": 0.0001996639472107963, "loss": 2.2141, "step": 1545 }, { "epoch": 0.05536555231256827, "grad_norm": 1.6981626749038696, "learning_rate": 0.00019966299642879375, "loss": 1.7203, "step": 1546 }, { "epoch": 0.05540136444213655, "grad_norm": 1.5179368257522583, "learning_rate": 0.00019966204430595412, "loss": 1.907, "step": 1547 }, { "epoch": 0.055437176571704834, "grad_norm": 3.7298059463500977, "learning_rate": 0.00019966109084229024, "loss": 1.9523, "step": 1548 }, { "epoch": 0.055472988701273124, "grad_norm": 2.229259729385376, "learning_rate": 0.00019966013603781493, "loss": 1.9006, "step": 1549 }, { "epoch": 0.05550880083084141, "grad_norm": 1.454759120941162, "learning_rate": 0.00019965917989254103, "loss": 1.7634, "step": 1550 }, { "epoch": 0.05554461296040969, "grad_norm": 1.9848576784133911, "learning_rate": 0.00019965822240648143, "loss": 2.0198, "step": 1551 }, { "epoch": 0.05558042508997797, "grad_norm": 1.571907877922058, "learning_rate": 0.00019965726357964902, "loss": 1.6818, "step": 1552 }, { "epoch": 0.05561623721954626, "grad_norm": 2.924750328063965, "learning_rate": 0.00019965630341205664, "loss": 1.8103, "step": 1553 }, { "epoch": 0.055652049349114545, "grad_norm": 1.6184571981430054, "learning_rate": 0.00019965534190371725, "loss": 1.8719, "step": 1554 }, { "epoch": 0.05568786147868283, "grad_norm": 1.6031067371368408, "learning_rate": 0.0001996543790546438, "loss": 2.0676, "step": 1555 }, { "epoch": 0.05572367360825112, "grad_norm": 2.796915054321289, "learning_rate": 0.00019965341486484923, "loss": 1.7383, "step": 1556 }, { "epoch": 0.0557594857378194, "grad_norm": 3.0277481079101562, "learning_rate": 0.00019965244933434648, "loss": 1.8378, "step": 1557 }, { "epoch": 0.05579529786738768, "grad_norm": 1.5920354127883911, "learning_rate": 0.00019965148246314858, "loss": 1.9057, "step": 1558 }, { "epoch": 0.055831109996955966, "grad_norm": 2.056974172592163, "learning_rate": 0.00019965051425126852, "loss": 1.6761, "step": 1559 }, { "epoch": 0.055866922126524256, "grad_norm": 1.310811161994934, "learning_rate": 0.00019964954469871936, "loss": 1.5442, "step": 1560 }, { "epoch": 0.05590273425609254, "grad_norm": 2.0601959228515625, "learning_rate": 0.0001996485738055141, "loss": 1.8756, "step": 1561 }, { "epoch": 0.05593854638566082, "grad_norm": 1.5039955377578735, "learning_rate": 0.00019964760157166578, "loss": 1.9241, "step": 1562 }, { "epoch": 0.05597435851522911, "grad_norm": 2.9783098697662354, "learning_rate": 0.00019964662799718753, "loss": 1.995, "step": 1563 }, { "epoch": 0.056010170644797394, "grad_norm": 2.375992774963379, "learning_rate": 0.00019964565308209248, "loss": 2.1893, "step": 1564 }, { "epoch": 0.05604598277436568, "grad_norm": 1.5130183696746826, "learning_rate": 0.00019964467682639364, "loss": 1.5716, "step": 1565 }, { "epoch": 0.05608179490393396, "grad_norm": 1.9226343631744385, "learning_rate": 0.00019964369923010424, "loss": 1.9059, "step": 1566 }, { "epoch": 0.05611760703350225, "grad_norm": 3.1932523250579834, "learning_rate": 0.00019964272029323742, "loss": 2.0947, "step": 1567 }, { "epoch": 0.05615341916307053, "grad_norm": 1.9467480182647705, "learning_rate": 0.00019964174001580628, "loss": 2.0824, "step": 1568 }, { "epoch": 0.056189231292638815, "grad_norm": 1.4615256786346436, "learning_rate": 0.00019964075839782407, "loss": 1.7063, "step": 1569 }, { "epoch": 0.056225043422207105, "grad_norm": 2.083310604095459, "learning_rate": 0.000199639775439304, "loss": 2.1004, "step": 1570 }, { "epoch": 0.05626085555177539, "grad_norm": 2.23530650138855, "learning_rate": 0.00019963879114025926, "loss": 1.7032, "step": 1571 }, { "epoch": 0.05629666768134367, "grad_norm": 1.425076961517334, "learning_rate": 0.00019963780550070315, "loss": 1.7797, "step": 1572 }, { "epoch": 0.05633247981091195, "grad_norm": 2.6845715045928955, "learning_rate": 0.00019963681852064883, "loss": 1.8535, "step": 1573 }, { "epoch": 0.05636829194048024, "grad_norm": 2.078648567199707, "learning_rate": 0.0001996358302001097, "loss": 1.6292, "step": 1574 }, { "epoch": 0.056404104070048526, "grad_norm": 2.747649908065796, "learning_rate": 0.00019963484053909896, "loss": 1.877, "step": 1575 }, { "epoch": 0.05643991619961681, "grad_norm": 2.103194236755371, "learning_rate": 0.00019963384953762995, "loss": 1.8571, "step": 1576 }, { "epoch": 0.05647572832918509, "grad_norm": 1.530626654624939, "learning_rate": 0.00019963285719571604, "loss": 1.5274, "step": 1577 }, { "epoch": 0.05651154045875338, "grad_norm": 1.7835010290145874, "learning_rate": 0.00019963186351337054, "loss": 1.678, "step": 1578 }, { "epoch": 0.056547352588321664, "grad_norm": 1.6494067907333374, "learning_rate": 0.00019963086849060684, "loss": 1.6927, "step": 1579 }, { "epoch": 0.05658316471788995, "grad_norm": 2.0887563228607178, "learning_rate": 0.0001996298721274383, "loss": 1.8169, "step": 1580 }, { "epoch": 0.05661897684745824, "grad_norm": 1.8033913373947144, "learning_rate": 0.00019962887442387834, "loss": 1.7722, "step": 1581 }, { "epoch": 0.05665478897702652, "grad_norm": 2.5214874744415283, "learning_rate": 0.0001996278753799404, "loss": 1.7613, "step": 1582 }, { "epoch": 0.0566906011065948, "grad_norm": 1.961786150932312, "learning_rate": 0.00019962687499563793, "loss": 1.8367, "step": 1583 }, { "epoch": 0.056726413236163085, "grad_norm": 2.0807454586029053, "learning_rate": 0.00019962587327098435, "loss": 1.7686, "step": 1584 }, { "epoch": 0.056762225365731375, "grad_norm": 1.7898211479187012, "learning_rate": 0.00019962487020599315, "loss": 1.2548, "step": 1585 }, { "epoch": 0.05679803749529966, "grad_norm": 1.7042467594146729, "learning_rate": 0.00019962386580067782, "loss": 1.6725, "step": 1586 }, { "epoch": 0.05683384962486794, "grad_norm": 1.3166635036468506, "learning_rate": 0.00019962286005505188, "loss": 1.9565, "step": 1587 }, { "epoch": 0.05686966175443623, "grad_norm": 1.8134676218032837, "learning_rate": 0.00019962185296912887, "loss": 1.7871, "step": 1588 }, { "epoch": 0.056905473884004514, "grad_norm": 1.3248748779296875, "learning_rate": 0.00019962084454292235, "loss": 1.7441, "step": 1589 }, { "epoch": 0.056941286013572796, "grad_norm": 2.7689707279205322, "learning_rate": 0.00019961983477644583, "loss": 1.7567, "step": 1590 }, { "epoch": 0.05697709814314108, "grad_norm": 1.4286848306655884, "learning_rate": 0.00019961882366971296, "loss": 1.5907, "step": 1591 }, { "epoch": 0.05701291027270937, "grad_norm": 1.531070590019226, "learning_rate": 0.00019961781122273734, "loss": 1.8205, "step": 1592 }, { "epoch": 0.05704872240227765, "grad_norm": 3.7116222381591797, "learning_rate": 0.00019961679743553252, "loss": 1.7679, "step": 1593 }, { "epoch": 0.057084534531845935, "grad_norm": 1.6121838092803955, "learning_rate": 0.0001996157823081122, "loss": 1.728, "step": 1594 }, { "epoch": 0.057120346661414224, "grad_norm": 1.6923259496688843, "learning_rate": 0.00019961476584049004, "loss": 1.5902, "step": 1595 }, { "epoch": 0.05715615879098251, "grad_norm": 1.7286845445632935, "learning_rate": 0.00019961374803267968, "loss": 1.8698, "step": 1596 }, { "epoch": 0.05719197092055079, "grad_norm": 1.6534048318862915, "learning_rate": 0.00019961272888469484, "loss": 1.8303, "step": 1597 }, { "epoch": 0.05722778305011907, "grad_norm": 2.573026180267334, "learning_rate": 0.00019961170839654922, "loss": 1.8427, "step": 1598 }, { "epoch": 0.05726359517968736, "grad_norm": 1.91075599193573, "learning_rate": 0.00019961068656825656, "loss": 1.7374, "step": 1599 }, { "epoch": 0.057299407309255646, "grad_norm": 1.761756181716919, "learning_rate": 0.0001996096633998306, "loss": 1.9732, "step": 1600 }, { "epoch": 0.05733521943882393, "grad_norm": 1.8080767393112183, "learning_rate": 0.0001996086388912851, "loss": 1.5933, "step": 1601 }, { "epoch": 0.05737103156839221, "grad_norm": 2.0051701068878174, "learning_rate": 0.00019960761304263386, "loss": 1.8956, "step": 1602 }, { "epoch": 0.0574068436979605, "grad_norm": 2.3873651027679443, "learning_rate": 0.0001996065858538907, "loss": 1.7061, "step": 1603 }, { "epoch": 0.057442655827528784, "grad_norm": 1.7301465272903442, "learning_rate": 0.00019960555732506937, "loss": 1.5289, "step": 1604 }, { "epoch": 0.05747846795709707, "grad_norm": 1.9653252363204956, "learning_rate": 0.00019960452745618375, "loss": 1.6136, "step": 1605 }, { "epoch": 0.057514280086665356, "grad_norm": 1.3157404661178589, "learning_rate": 0.0001996034962472477, "loss": 1.833, "step": 1606 }, { "epoch": 0.05755009221623364, "grad_norm": 1.559603214263916, "learning_rate": 0.0001996024636982751, "loss": 1.6462, "step": 1607 }, { "epoch": 0.05758590434580192, "grad_norm": 1.8252371549606323, "learning_rate": 0.0001996014298092798, "loss": 1.6637, "step": 1608 }, { "epoch": 0.057621716475370205, "grad_norm": 1.6523933410644531, "learning_rate": 0.00019960039458027576, "loss": 1.6047, "step": 1609 }, { "epoch": 0.057657528604938495, "grad_norm": 1.5484936237335205, "learning_rate": 0.00019959935801127686, "loss": 1.7067, "step": 1610 }, { "epoch": 0.05769334073450678, "grad_norm": 2.451167583465576, "learning_rate": 0.00019959832010229712, "loss": 1.419, "step": 1611 }, { "epoch": 0.05772915286407506, "grad_norm": 1.6821362972259521, "learning_rate": 0.0001995972808533504, "loss": 2.039, "step": 1612 }, { "epoch": 0.05776496499364335, "grad_norm": 1.6939760446548462, "learning_rate": 0.00019959624026445077, "loss": 1.6131, "step": 1613 }, { "epoch": 0.05780077712321163, "grad_norm": 1.5620489120483398, "learning_rate": 0.0001995951983356122, "loss": 1.6838, "step": 1614 }, { "epoch": 0.057836589252779916, "grad_norm": 2.6445093154907227, "learning_rate": 0.0001995941550668487, "loss": 1.8336, "step": 1615 }, { "epoch": 0.0578724013823482, "grad_norm": 1.760237693786621, "learning_rate": 0.00019959311045817432, "loss": 1.6198, "step": 1616 }, { "epoch": 0.05790821351191649, "grad_norm": 1.3533624410629272, "learning_rate": 0.00019959206450960307, "loss": 1.7068, "step": 1617 }, { "epoch": 0.05794402564148477, "grad_norm": 2.5859081745147705, "learning_rate": 0.0001995910172211491, "loss": 1.9612, "step": 1618 }, { "epoch": 0.057979837771053054, "grad_norm": 1.4971524477005005, "learning_rate": 0.0001995899685928264, "loss": 1.7951, "step": 1619 }, { "epoch": 0.058015649900621344, "grad_norm": 1.8958418369293213, "learning_rate": 0.0001995889186246492, "loss": 1.5899, "step": 1620 }, { "epoch": 0.05805146203018963, "grad_norm": 2.1714656352996826, "learning_rate": 0.0001995878673166315, "loss": 1.5648, "step": 1621 }, { "epoch": 0.05808727415975791, "grad_norm": 2.7349958419799805, "learning_rate": 0.00019958681466878756, "loss": 1.9454, "step": 1622 }, { "epoch": 0.05812308628932619, "grad_norm": 2.072175979614258, "learning_rate": 0.00019958576068113145, "loss": 1.5197, "step": 1623 }, { "epoch": 0.05815889841889448, "grad_norm": 2.4490714073181152, "learning_rate": 0.00019958470535367742, "loss": 1.5733, "step": 1624 }, { "epoch": 0.058194710548462765, "grad_norm": 1.668999195098877, "learning_rate": 0.00019958364868643958, "loss": 1.5685, "step": 1625 }, { "epoch": 0.05823052267803105, "grad_norm": 1.6361305713653564, "learning_rate": 0.00019958259067943225, "loss": 1.9354, "step": 1626 }, { "epoch": 0.05826633480759933, "grad_norm": 1.7708817720413208, "learning_rate": 0.0001995815313326696, "loss": 1.6394, "step": 1627 }, { "epoch": 0.05830214693716762, "grad_norm": 1.748575210571289, "learning_rate": 0.0001995804706461659, "loss": 1.8433, "step": 1628 }, { "epoch": 0.0583379590667359, "grad_norm": 1.4690053462982178, "learning_rate": 0.0001995794086199354, "loss": 1.5964, "step": 1629 }, { "epoch": 0.058373771196304186, "grad_norm": 1.3967971801757812, "learning_rate": 0.00019957834525399242, "loss": 1.657, "step": 1630 }, { "epoch": 0.058409583325872476, "grad_norm": 1.4848518371582031, "learning_rate": 0.00019957728054835125, "loss": 1.8209, "step": 1631 }, { "epoch": 0.05844539545544076, "grad_norm": 2.1335513591766357, "learning_rate": 0.0001995762145030262, "loss": 1.7809, "step": 1632 }, { "epoch": 0.05848120758500904, "grad_norm": 3.323810577392578, "learning_rate": 0.00019957514711803164, "loss": 1.4154, "step": 1633 }, { "epoch": 0.058517019714577324, "grad_norm": 1.775437593460083, "learning_rate": 0.00019957407839338193, "loss": 1.6845, "step": 1634 }, { "epoch": 0.058552831844145614, "grad_norm": 2.64595365524292, "learning_rate": 0.00019957300832909144, "loss": 1.5673, "step": 1635 }, { "epoch": 0.0585886439737139, "grad_norm": 1.9590623378753662, "learning_rate": 0.00019957193692517455, "loss": 1.7878, "step": 1636 }, { "epoch": 0.05862445610328218, "grad_norm": 1.5874866247177124, "learning_rate": 0.00019957086418164567, "loss": 1.5769, "step": 1637 }, { "epoch": 0.05866026823285047, "grad_norm": 2.0571467876434326, "learning_rate": 0.00019956979009851927, "loss": 1.5945, "step": 1638 }, { "epoch": 0.05869608036241875, "grad_norm": 1.961224913597107, "learning_rate": 0.00019956871467580978, "loss": 1.8875, "step": 1639 }, { "epoch": 0.058731892491987035, "grad_norm": 1.3154363632202148, "learning_rate": 0.00019956763791353165, "loss": 1.4418, "step": 1640 }, { "epoch": 0.05876770462155532, "grad_norm": 1.3892945051193237, "learning_rate": 0.00019956655981169942, "loss": 1.7232, "step": 1641 }, { "epoch": 0.05880351675112361, "grad_norm": 1.9630827903747559, "learning_rate": 0.00019956548037032752, "loss": 1.8138, "step": 1642 }, { "epoch": 0.05883932888069189, "grad_norm": 1.364547848701477, "learning_rate": 0.00019956439958943053, "loss": 1.8376, "step": 1643 }, { "epoch": 0.058875141010260174, "grad_norm": 1.8685306310653687, "learning_rate": 0.00019956331746902298, "loss": 1.8356, "step": 1644 }, { "epoch": 0.05891095313982846, "grad_norm": 2.003082036972046, "learning_rate": 0.00019956223400911943, "loss": 1.7245, "step": 1645 }, { "epoch": 0.058946765269396746, "grad_norm": 2.334925651550293, "learning_rate": 0.00019956114920973442, "loss": 1.9533, "step": 1646 }, { "epoch": 0.05898257739896503, "grad_norm": 2.2185535430908203, "learning_rate": 0.00019956006307088258, "loss": 1.6938, "step": 1647 }, { "epoch": 0.05901838952853331, "grad_norm": 1.9847118854522705, "learning_rate": 0.00019955897559257853, "loss": 1.7853, "step": 1648 }, { "epoch": 0.0590542016581016, "grad_norm": 2.876863956451416, "learning_rate": 0.00019955788677483686, "loss": 1.6786, "step": 1649 }, { "epoch": 0.059090013787669884, "grad_norm": 1.6888865232467651, "learning_rate": 0.00019955679661767226, "loss": 1.4412, "step": 1650 }, { "epoch": 0.05912582591723817, "grad_norm": 2.452845335006714, "learning_rate": 0.0001995557051210994, "loss": 1.7187, "step": 1651 }, { "epoch": 0.05916163804680645, "grad_norm": 1.8312182426452637, "learning_rate": 0.0001995546122851329, "loss": 1.9031, "step": 1652 }, { "epoch": 0.05919745017637474, "grad_norm": 1.6904535293579102, "learning_rate": 0.00019955351810978754, "loss": 1.9099, "step": 1653 }, { "epoch": 0.05923326230594302, "grad_norm": 1.750017762184143, "learning_rate": 0.000199552422595078, "loss": 1.8949, "step": 1654 }, { "epoch": 0.059269074435511306, "grad_norm": 1.7873555421829224, "learning_rate": 0.000199551325741019, "loss": 1.6408, "step": 1655 }, { "epoch": 0.059304886565079595, "grad_norm": 2.2488410472869873, "learning_rate": 0.00019955022754762535, "loss": 2.0474, "step": 1656 }, { "epoch": 0.05934069869464788, "grad_norm": 1.8136587142944336, "learning_rate": 0.0001995491280149118, "loss": 1.9466, "step": 1657 }, { "epoch": 0.05937651082421616, "grad_norm": 1.3177629709243774, "learning_rate": 0.00019954802714289315, "loss": 1.7, "step": 1658 }, { "epoch": 0.059412322953784444, "grad_norm": 1.3142410516738892, "learning_rate": 0.0001995469249315842, "loss": 1.6965, "step": 1659 }, { "epoch": 0.059448135083352734, "grad_norm": 1.6144740581512451, "learning_rate": 0.00019954582138099978, "loss": 1.8903, "step": 1660 }, { "epoch": 0.059483947212921016, "grad_norm": 1.29384446144104, "learning_rate": 0.00019954471649115475, "loss": 1.614, "step": 1661 }, { "epoch": 0.0595197593424893, "grad_norm": 1.294191598892212, "learning_rate": 0.00019954361026206394, "loss": 1.5788, "step": 1662 }, { "epoch": 0.05955557147205759, "grad_norm": 1.5210322141647339, "learning_rate": 0.00019954250269374227, "loss": 1.5188, "step": 1663 }, { "epoch": 0.05959138360162587, "grad_norm": 2.4071707725524902, "learning_rate": 0.0001995413937862046, "loss": 1.7177, "step": 1664 }, { "epoch": 0.059627195731194155, "grad_norm": 1.5951025485992432, "learning_rate": 0.0001995402835394659, "loss": 1.5914, "step": 1665 }, { "epoch": 0.05966300786076244, "grad_norm": 2.301365852355957, "learning_rate": 0.00019953917195354105, "loss": 1.6783, "step": 1666 }, { "epoch": 0.05969881999033073, "grad_norm": 1.3707059621810913, "learning_rate": 0.00019953805902844508, "loss": 1.9201, "step": 1667 }, { "epoch": 0.05973463211989901, "grad_norm": 1.7182759046554565, "learning_rate": 0.00019953694476419293, "loss": 1.8426, "step": 1668 }, { "epoch": 0.05977044424946729, "grad_norm": 1.4055579900741577, "learning_rate": 0.00019953582916079957, "loss": 1.4242, "step": 1669 }, { "epoch": 0.05980625637903558, "grad_norm": 1.643748164176941, "learning_rate": 0.00019953471221827998, "loss": 1.7121, "step": 1670 }, { "epoch": 0.059842068508603866, "grad_norm": 1.5763031244277954, "learning_rate": 0.00019953359393664927, "loss": 1.564, "step": 1671 }, { "epoch": 0.05987788063817215, "grad_norm": 1.790825605392456, "learning_rate": 0.0001995324743159224, "loss": 1.5434, "step": 1672 }, { "epoch": 0.05991369276774043, "grad_norm": 2.168774127960205, "learning_rate": 0.00019953135335611452, "loss": 1.6468, "step": 1673 }, { "epoch": 0.05994950489730872, "grad_norm": 1.6681972742080688, "learning_rate": 0.00019953023105724068, "loss": 1.9108, "step": 1674 }, { "epoch": 0.059985317026877004, "grad_norm": 1.9940334558486938, "learning_rate": 0.00019952910741931592, "loss": 1.5585, "step": 1675 }, { "epoch": 0.06002112915644529, "grad_norm": 2.587232828140259, "learning_rate": 0.0001995279824423554, "loss": 1.8064, "step": 1676 }, { "epoch": 0.06005694128601357, "grad_norm": 1.9780170917510986, "learning_rate": 0.0001995268561263743, "loss": 1.8043, "step": 1677 }, { "epoch": 0.06009275341558186, "grad_norm": 1.620010256767273, "learning_rate": 0.00019952572847138772, "loss": 1.865, "step": 1678 }, { "epoch": 0.06012856554515014, "grad_norm": 2.4111506938934326, "learning_rate": 0.00019952459947741082, "loss": 1.9095, "step": 1679 }, { "epoch": 0.060164377674718425, "grad_norm": 2.624497652053833, "learning_rate": 0.00019952346914445883, "loss": 1.902, "step": 1680 }, { "epoch": 0.060200189804286715, "grad_norm": 1.8209869861602783, "learning_rate": 0.00019952233747254691, "loss": 1.7628, "step": 1681 }, { "epoch": 0.060236001933855, "grad_norm": 1.5332977771759033, "learning_rate": 0.00019952120446169037, "loss": 1.6418, "step": 1682 }, { "epoch": 0.06027181406342328, "grad_norm": 1.8602702617645264, "learning_rate": 0.00019952007011190433, "loss": 1.918, "step": 1683 }, { "epoch": 0.06030762619299156, "grad_norm": 1.2004653215408325, "learning_rate": 0.00019951893442320416, "loss": 1.5955, "step": 1684 }, { "epoch": 0.06034343832255985, "grad_norm": 1.2076934576034546, "learning_rate": 0.0001995177973956051, "loss": 1.7053, "step": 1685 }, { "epoch": 0.060379250452128136, "grad_norm": 2.4891135692596436, "learning_rate": 0.00019951665902912243, "loss": 1.902, "step": 1686 }, { "epoch": 0.06041506258169642, "grad_norm": 1.7342336177825928, "learning_rate": 0.00019951551932377148, "loss": 1.7217, "step": 1687 }, { "epoch": 0.06045087471126471, "grad_norm": 2.313361406326294, "learning_rate": 0.00019951437827956758, "loss": 1.5658, "step": 1688 }, { "epoch": 0.06048668684083299, "grad_norm": 1.3781776428222656, "learning_rate": 0.0001995132358965261, "loss": 1.5952, "step": 1689 }, { "epoch": 0.060522498970401274, "grad_norm": 2.3602588176727295, "learning_rate": 0.00019951209217466238, "loss": 1.8622, "step": 1690 }, { "epoch": 0.06055831109996956, "grad_norm": 2.0375075340270996, "learning_rate": 0.0001995109471139918, "loss": 1.6982, "step": 1691 }, { "epoch": 0.06059412322953785, "grad_norm": 1.8000264167785645, "learning_rate": 0.0001995098007145298, "loss": 1.8062, "step": 1692 }, { "epoch": 0.06062993535910613, "grad_norm": 1.7053439617156982, "learning_rate": 0.00019950865297629184, "loss": 1.789, "step": 1693 }, { "epoch": 0.06066574748867441, "grad_norm": 3.238233804702759, "learning_rate": 0.00019950750389929328, "loss": 2.2441, "step": 1694 }, { "epoch": 0.0607015596182427, "grad_norm": 1.5956525802612305, "learning_rate": 0.0001995063534835496, "loss": 1.8782, "step": 1695 }, { "epoch": 0.060737371747810985, "grad_norm": 2.0592153072357178, "learning_rate": 0.0001995052017290763, "loss": 1.846, "step": 1696 }, { "epoch": 0.06077318387737927, "grad_norm": 1.7917789220809937, "learning_rate": 0.00019950404863588883, "loss": 1.7443, "step": 1697 }, { "epoch": 0.06080899600694755, "grad_norm": 1.8193100690841675, "learning_rate": 0.00019950289420400278, "loss": 2.2193, "step": 1698 }, { "epoch": 0.06084480813651584, "grad_norm": 1.6950918436050415, "learning_rate": 0.00019950173843343364, "loss": 1.9775, "step": 1699 }, { "epoch": 0.06088062026608412, "grad_norm": 1.3480393886566162, "learning_rate": 0.00019950058132419692, "loss": 1.5773, "step": 1700 }, { "epoch": 0.060916432395652406, "grad_norm": 2.6421098709106445, "learning_rate": 0.00019949942287630825, "loss": 1.8098, "step": 1701 }, { "epoch": 0.06095224452522069, "grad_norm": 1.7833307981491089, "learning_rate": 0.00019949826308978316, "loss": 1.758, "step": 1702 }, { "epoch": 0.06098805665478898, "grad_norm": 2.0496833324432373, "learning_rate": 0.0001994971019646373, "loss": 1.7918, "step": 1703 }, { "epoch": 0.06102386878435726, "grad_norm": 2.668501377105713, "learning_rate": 0.0001994959395008863, "loss": 1.8106, "step": 1704 }, { "epoch": 0.061059680913925544, "grad_norm": 1.7229629755020142, "learning_rate": 0.00019949477569854575, "loss": 1.7763, "step": 1705 }, { "epoch": 0.061095493043493834, "grad_norm": 2.5283937454223633, "learning_rate": 0.00019949361055763133, "loss": 1.9133, "step": 1706 }, { "epoch": 0.06113130517306212, "grad_norm": 2.3446457386016846, "learning_rate": 0.00019949244407815875, "loss": 1.5309, "step": 1707 }, { "epoch": 0.0611671173026304, "grad_norm": 1.1809579133987427, "learning_rate": 0.00019949127626014363, "loss": 1.4884, "step": 1708 }, { "epoch": 0.06120292943219868, "grad_norm": 1.2034236192703247, "learning_rate": 0.00019949010710360173, "loss": 1.5259, "step": 1709 }, { "epoch": 0.06123874156176697, "grad_norm": 2.4860334396362305, "learning_rate": 0.0001994889366085488, "loss": 1.9134, "step": 1710 }, { "epoch": 0.061274553691335255, "grad_norm": 1.7052736282348633, "learning_rate": 0.00019948776477500053, "loss": 1.8427, "step": 1711 }, { "epoch": 0.06131036582090354, "grad_norm": 1.8961480855941772, "learning_rate": 0.0001994865916029727, "loss": 1.6836, "step": 1712 }, { "epoch": 0.06134617795047183, "grad_norm": 1.532853603363037, "learning_rate": 0.00019948541709248116, "loss": 1.7492, "step": 1713 }, { "epoch": 0.06138199008004011, "grad_norm": 1.632897138595581, "learning_rate": 0.00019948424124354163, "loss": 1.7434, "step": 1714 }, { "epoch": 0.061417802209608394, "grad_norm": 2.043008804321289, "learning_rate": 0.00019948306405616996, "loss": 1.8477, "step": 1715 }, { "epoch": 0.061453614339176676, "grad_norm": 1.3530820608139038, "learning_rate": 0.00019948188553038198, "loss": 1.9704, "step": 1716 }, { "epoch": 0.061489426468744966, "grad_norm": 1.3419357538223267, "learning_rate": 0.0001994807056661936, "loss": 1.6517, "step": 1717 }, { "epoch": 0.06152523859831325, "grad_norm": 1.2969164848327637, "learning_rate": 0.00019947952446362058, "loss": 1.8418, "step": 1718 }, { "epoch": 0.06156105072788153, "grad_norm": 2.865709066390991, "learning_rate": 0.00019947834192267892, "loss": 1.862, "step": 1719 }, { "epoch": 0.06159686285744982, "grad_norm": 1.7418867349624634, "learning_rate": 0.00019947715804338447, "loss": 1.8999, "step": 1720 }, { "epoch": 0.061632674987018105, "grad_norm": 2.2759385108947754, "learning_rate": 0.00019947597282575318, "loss": 1.6186, "step": 1721 }, { "epoch": 0.06166848711658639, "grad_norm": 1.5044995546340942, "learning_rate": 0.00019947478626980097, "loss": 1.7092, "step": 1722 }, { "epoch": 0.06170429924615467, "grad_norm": 1.620406150817871, "learning_rate": 0.00019947359837554384, "loss": 1.6904, "step": 1723 }, { "epoch": 0.06174011137572296, "grad_norm": 1.9609277248382568, "learning_rate": 0.00019947240914299776, "loss": 1.6958, "step": 1724 }, { "epoch": 0.06177592350529124, "grad_norm": 2.1059086322784424, "learning_rate": 0.00019947121857217875, "loss": 1.7419, "step": 1725 }, { "epoch": 0.061811735634859526, "grad_norm": 2.0715277194976807, "learning_rate": 0.00019947002666310276, "loss": 1.6952, "step": 1726 }, { "epoch": 0.06184754776442781, "grad_norm": 1.6598073244094849, "learning_rate": 0.00019946883341578588, "loss": 1.6857, "step": 1727 }, { "epoch": 0.0618833598939961, "grad_norm": 1.5604004859924316, "learning_rate": 0.00019946763883024415, "loss": 1.6462, "step": 1728 }, { "epoch": 0.06191917202356438, "grad_norm": 2.105992555618286, "learning_rate": 0.00019946644290649368, "loss": 1.77, "step": 1729 }, { "epoch": 0.061954984153132664, "grad_norm": 2.2045366764068604, "learning_rate": 0.00019946524564455048, "loss": 2.0856, "step": 1730 }, { "epoch": 0.061990796282700954, "grad_norm": 1.7695341110229492, "learning_rate": 0.00019946404704443072, "loss": 1.7301, "step": 1731 }, { "epoch": 0.06202660841226924, "grad_norm": 1.6013516187667847, "learning_rate": 0.00019946284710615052, "loss": 1.5589, "step": 1732 }, { "epoch": 0.06206242054183752, "grad_norm": 1.9727685451507568, "learning_rate": 0.00019946164582972594, "loss": 1.8941, "step": 1733 }, { "epoch": 0.0620982326714058, "grad_norm": 1.8168443441390991, "learning_rate": 0.0001994604432151733, "loss": 1.5806, "step": 1734 }, { "epoch": 0.06213404480097409, "grad_norm": 1.5395933389663696, "learning_rate": 0.00019945923926250865, "loss": 1.88, "step": 1735 }, { "epoch": 0.062169856930542375, "grad_norm": 1.3251415491104126, "learning_rate": 0.00019945803397174823, "loss": 1.6235, "step": 1736 }, { "epoch": 0.06220566906011066, "grad_norm": 1.7758703231811523, "learning_rate": 0.00019945682734290825, "loss": 1.7609, "step": 1737 }, { "epoch": 0.06224148118967895, "grad_norm": 1.366156816482544, "learning_rate": 0.00019945561937600496, "loss": 1.629, "step": 1738 }, { "epoch": 0.06227729331924723, "grad_norm": 2.5820953845977783, "learning_rate": 0.0001994544100710546, "loss": 1.6179, "step": 1739 }, { "epoch": 0.06231310544881551, "grad_norm": 1.8584883213043213, "learning_rate": 0.0001994531994280734, "loss": 1.8395, "step": 1740 }, { "epoch": 0.062348917578383796, "grad_norm": 1.4980369806289673, "learning_rate": 0.00019945198744707776, "loss": 1.8487, "step": 1741 }, { "epoch": 0.062384729707952086, "grad_norm": 2.2591607570648193, "learning_rate": 0.00019945077412808387, "loss": 1.8087, "step": 1742 }, { "epoch": 0.06242054183752037, "grad_norm": 4.082409381866455, "learning_rate": 0.0001994495594711081, "loss": 1.7609, "step": 1743 }, { "epoch": 0.06245635396708865, "grad_norm": 1.8433887958526611, "learning_rate": 0.0001994483434761668, "loss": 1.6445, "step": 1744 }, { "epoch": 0.06249216609665694, "grad_norm": 1.5228683948516846, "learning_rate": 0.0001994471261432763, "loss": 1.8867, "step": 1745 }, { "epoch": 0.06252797822622522, "grad_norm": 1.2005826234817505, "learning_rate": 0.00019944590747245298, "loss": 1.7023, "step": 1746 }, { "epoch": 0.06256379035579351, "grad_norm": 1.6530516147613525, "learning_rate": 0.0001994446874637133, "loss": 1.9049, "step": 1747 }, { "epoch": 0.0625996024853618, "grad_norm": 2.352060556411743, "learning_rate": 0.00019944346611707356, "loss": 1.6247, "step": 1748 }, { "epoch": 0.06263541461493008, "grad_norm": 2.1047065258026123, "learning_rate": 0.0001994422434325503, "loss": 1.6373, "step": 1749 }, { "epoch": 0.06267122674449836, "grad_norm": 1.7999935150146484, "learning_rate": 0.00019944101941015994, "loss": 1.7002, "step": 1750 }, { "epoch": 0.06270703887406665, "grad_norm": 1.833964467048645, "learning_rate": 0.0001994397940499189, "loss": 1.6692, "step": 1751 }, { "epoch": 0.06274285100363493, "grad_norm": 1.7198271751403809, "learning_rate": 0.00019943856735184373, "loss": 1.9069, "step": 1752 }, { "epoch": 0.06277866313320321, "grad_norm": 2.55334734916687, "learning_rate": 0.00019943733931595086, "loss": 1.789, "step": 1753 }, { "epoch": 0.06281447526277151, "grad_norm": 1.8098565340042114, "learning_rate": 0.0001994361099422569, "loss": 1.7267, "step": 1754 }, { "epoch": 0.06285028739233979, "grad_norm": 1.1642190217971802, "learning_rate": 0.00019943487923077831, "loss": 1.4918, "step": 1755 }, { "epoch": 0.06288609952190807, "grad_norm": 2.4731671810150146, "learning_rate": 0.0001994336471815317, "loss": 1.7026, "step": 1756 }, { "epoch": 0.06292191165147636, "grad_norm": 1.744199514389038, "learning_rate": 0.00019943241379453364, "loss": 1.6319, "step": 1757 }, { "epoch": 0.06295772378104464, "grad_norm": 2.089003562927246, "learning_rate": 0.00019943117906980068, "loss": 1.5497, "step": 1758 }, { "epoch": 0.06299353591061292, "grad_norm": 1.1575398445129395, "learning_rate": 0.00019942994300734947, "loss": 1.7987, "step": 1759 }, { "epoch": 0.0630293480401812, "grad_norm": 1.753517746925354, "learning_rate": 0.00019942870560719664, "loss": 1.7975, "step": 1760 }, { "epoch": 0.06306516016974949, "grad_norm": 2.7385034561157227, "learning_rate": 0.00019942746686935883, "loss": 1.641, "step": 1761 }, { "epoch": 0.06310097229931778, "grad_norm": 3.074314594268799, "learning_rate": 0.0001994262267938527, "loss": 1.6846, "step": 1762 }, { "epoch": 0.06313678442888607, "grad_norm": 1.2961831092834473, "learning_rate": 0.00019942498538069495, "loss": 1.7411, "step": 1763 }, { "epoch": 0.06317259655845435, "grad_norm": 2.032168388366699, "learning_rate": 0.00019942374262990224, "loss": 1.8094, "step": 1764 }, { "epoch": 0.06320840868802263, "grad_norm": 1.753873586654663, "learning_rate": 0.00019942249854149136, "loss": 1.7005, "step": 1765 }, { "epoch": 0.06324422081759092, "grad_norm": 2.4005773067474365, "learning_rate": 0.000199421253115479, "loss": 1.8196, "step": 1766 }, { "epoch": 0.0632800329471592, "grad_norm": 1.66982102394104, "learning_rate": 0.0001994200063518819, "loss": 1.7387, "step": 1767 }, { "epoch": 0.06331584507672748, "grad_norm": 1.7754584550857544, "learning_rate": 0.0001994187582507169, "loss": 1.7563, "step": 1768 }, { "epoch": 0.06335165720629578, "grad_norm": 2.168553590774536, "learning_rate": 0.0001994175088120007, "loss": 1.5715, "step": 1769 }, { "epoch": 0.06338746933586406, "grad_norm": 2.892993450164795, "learning_rate": 0.00019941625803575019, "loss": 1.6514, "step": 1770 }, { "epoch": 0.06342328146543234, "grad_norm": 1.5328547954559326, "learning_rate": 0.00019941500592198216, "loss": 1.7424, "step": 1771 }, { "epoch": 0.06345909359500063, "grad_norm": 1.4240583181381226, "learning_rate": 0.00019941375247071346, "loss": 1.7308, "step": 1772 }, { "epoch": 0.06349490572456891, "grad_norm": 1.379332184791565, "learning_rate": 0.00019941249768196093, "loss": 1.6878, "step": 1773 }, { "epoch": 0.06353071785413719, "grad_norm": 1.5497807264328003, "learning_rate": 0.0001994112415557415, "loss": 1.6895, "step": 1774 }, { "epoch": 0.06356652998370547, "grad_norm": 2.0320279598236084, "learning_rate": 0.00019940998409207205, "loss": 1.9394, "step": 1775 }, { "epoch": 0.06360234211327377, "grad_norm": 1.5920579433441162, "learning_rate": 0.00019940872529096947, "loss": 1.4275, "step": 1776 }, { "epoch": 0.06363815424284205, "grad_norm": 1.7592054605484009, "learning_rate": 0.00019940746515245073, "loss": 1.8923, "step": 1777 }, { "epoch": 0.06367396637241034, "grad_norm": 1.5628163814544678, "learning_rate": 0.0001994062036765328, "loss": 1.9191, "step": 1778 }, { "epoch": 0.06370977850197862, "grad_norm": 2.930593729019165, "learning_rate": 0.0001994049408632326, "loss": 1.7287, "step": 1779 }, { "epoch": 0.0637455906315469, "grad_norm": 2.2003262042999268, "learning_rate": 0.00019940367671256716, "loss": 1.8437, "step": 1780 }, { "epoch": 0.06378140276111519, "grad_norm": 1.2991431951522827, "learning_rate": 0.00019940241122455346, "loss": 1.8275, "step": 1781 }, { "epoch": 0.06381721489068347, "grad_norm": 1.6640808582305908, "learning_rate": 0.00019940114439920853, "loss": 1.7935, "step": 1782 }, { "epoch": 0.06385302702025177, "grad_norm": 1.9864023923873901, "learning_rate": 0.00019939987623654944, "loss": 1.7738, "step": 1783 }, { "epoch": 0.06388883914982005, "grad_norm": 1.4357529878616333, "learning_rate": 0.0001993986067365932, "loss": 1.7587, "step": 1784 }, { "epoch": 0.06392465127938833, "grad_norm": 1.5999445915222168, "learning_rate": 0.00019939733589935694, "loss": 1.7795, "step": 1785 }, { "epoch": 0.06396046340895661, "grad_norm": 2.447521448135376, "learning_rate": 0.00019939606372485776, "loss": 1.9686, "step": 1786 }, { "epoch": 0.0639962755385249, "grad_norm": 1.655842900276184, "learning_rate": 0.00019939479021311273, "loss": 2.0018, "step": 1787 }, { "epoch": 0.06403208766809318, "grad_norm": 1.4144165515899658, "learning_rate": 0.000199393515364139, "loss": 1.5992, "step": 1788 }, { "epoch": 0.06406789979766146, "grad_norm": 2.399679183959961, "learning_rate": 0.00019939223917795373, "loss": 1.7488, "step": 1789 }, { "epoch": 0.06410371192722976, "grad_norm": 2.1720006465911865, "learning_rate": 0.00019939096165457411, "loss": 1.8954, "step": 1790 }, { "epoch": 0.06413952405679804, "grad_norm": 1.701594352722168, "learning_rate": 0.00019938968279401727, "loss": 1.9236, "step": 1791 }, { "epoch": 0.06417533618636632, "grad_norm": 1.7471704483032227, "learning_rate": 0.0001993884025963005, "loss": 1.9107, "step": 1792 }, { "epoch": 0.06421114831593461, "grad_norm": 1.7068006992340088, "learning_rate": 0.00019938712106144096, "loss": 1.9236, "step": 1793 }, { "epoch": 0.06424696044550289, "grad_norm": 1.5164170265197754, "learning_rate": 0.0001993858381894559, "loss": 2.014, "step": 1794 }, { "epoch": 0.06428277257507117, "grad_norm": 1.6961904764175415, "learning_rate": 0.00019938455398036257, "loss": 2.0187, "step": 1795 }, { "epoch": 0.06431858470463946, "grad_norm": 1.8207290172576904, "learning_rate": 0.00019938326843417826, "loss": 1.533, "step": 1796 }, { "epoch": 0.06435439683420775, "grad_norm": 2.8234851360321045, "learning_rate": 0.0001993819815509203, "loss": 1.7141, "step": 1797 }, { "epoch": 0.06439020896377604, "grad_norm": 2.3623080253601074, "learning_rate": 0.00019938069333060593, "loss": 2.0943, "step": 1798 }, { "epoch": 0.06442602109334432, "grad_norm": 1.3733361959457397, "learning_rate": 0.00019937940377325256, "loss": 1.6068, "step": 1799 }, { "epoch": 0.0644618332229126, "grad_norm": 1.828696370124817, "learning_rate": 0.0001993781128788775, "loss": 1.6581, "step": 1800 }, { "epoch": 0.06449764535248088, "grad_norm": 1.8768473863601685, "learning_rate": 0.00019937682064749811, "loss": 1.8659, "step": 1801 }, { "epoch": 0.06453345748204917, "grad_norm": 1.796538233757019, "learning_rate": 0.0001993755270791318, "loss": 1.6696, "step": 1802 }, { "epoch": 0.06456926961161745, "grad_norm": 2.022562026977539, "learning_rate": 0.00019937423217379594, "loss": 1.5342, "step": 1803 }, { "epoch": 0.06460508174118575, "grad_norm": 1.4857107400894165, "learning_rate": 0.00019937293593150796, "loss": 1.3543, "step": 1804 }, { "epoch": 0.06464089387075403, "grad_norm": 1.9994350671768188, "learning_rate": 0.00019937163835228534, "loss": 1.6907, "step": 1805 }, { "epoch": 0.06467670600032231, "grad_norm": 1.9123289585113525, "learning_rate": 0.0001993703394361455, "loss": 1.5374, "step": 1806 }, { "epoch": 0.0647125181298906, "grad_norm": 1.8468797206878662, "learning_rate": 0.0001993690391831059, "loss": 1.4841, "step": 1807 }, { "epoch": 0.06474833025945888, "grad_norm": 2.5288283824920654, "learning_rate": 0.00019936773759318408, "loss": 1.5264, "step": 1808 }, { "epoch": 0.06478414238902716, "grad_norm": 1.552699089050293, "learning_rate": 0.0001993664346663975, "loss": 1.6451, "step": 1809 }, { "epoch": 0.06481995451859544, "grad_norm": 1.2891765832901, "learning_rate": 0.00019936513040276371, "loss": 1.4697, "step": 1810 }, { "epoch": 0.06485576664816373, "grad_norm": 2.329200267791748, "learning_rate": 0.00019936382480230028, "loss": 1.55, "step": 1811 }, { "epoch": 0.06489157877773202, "grad_norm": 1.7297874689102173, "learning_rate": 0.00019936251786502478, "loss": 1.4764, "step": 1812 }, { "epoch": 0.0649273909073003, "grad_norm": 1.8127317428588867, "learning_rate": 0.00019936120959095473, "loss": 1.5498, "step": 1813 }, { "epoch": 0.06496320303686859, "grad_norm": 1.7599560022354126, "learning_rate": 0.0001993598999801078, "loss": 1.7308, "step": 1814 }, { "epoch": 0.06499901516643687, "grad_norm": 2.6168103218078613, "learning_rate": 0.00019935858903250155, "loss": 1.8455, "step": 1815 }, { "epoch": 0.06503482729600515, "grad_norm": 2.163510799407959, "learning_rate": 0.00019935727674815369, "loss": 1.6476, "step": 1816 }, { "epoch": 0.06507063942557344, "grad_norm": 1.5857428312301636, "learning_rate": 0.0001993559631270818, "loss": 1.771, "step": 1817 }, { "epoch": 0.06510645155514172, "grad_norm": 2.8085968494415283, "learning_rate": 0.0001993546481693036, "loss": 1.7182, "step": 1818 }, { "epoch": 0.06514226368471002, "grad_norm": 1.4190795421600342, "learning_rate": 0.00019935333187483676, "loss": 1.6694, "step": 1819 }, { "epoch": 0.0651780758142783, "grad_norm": 1.375416874885559, "learning_rate": 0.000199352014243699, "loss": 1.7377, "step": 1820 }, { "epoch": 0.06521388794384658, "grad_norm": 2.3879919052124023, "learning_rate": 0.00019935069527590805, "loss": 1.703, "step": 1821 }, { "epoch": 0.06524970007341487, "grad_norm": 1.3292384147644043, "learning_rate": 0.00019934937497148163, "loss": 1.657, "step": 1822 }, { "epoch": 0.06528551220298315, "grad_norm": 1.2860294580459595, "learning_rate": 0.00019934805333043752, "loss": 1.6298, "step": 1823 }, { "epoch": 0.06532132433255143, "grad_norm": 1.6525280475616455, "learning_rate": 0.00019934673035279353, "loss": 1.8054, "step": 1824 }, { "epoch": 0.06535713646211971, "grad_norm": 1.3846570253372192, "learning_rate": 0.00019934540603856743, "loss": 1.6464, "step": 1825 }, { "epoch": 0.06539294859168801, "grad_norm": 1.7888760566711426, "learning_rate": 0.000199344080387777, "loss": 1.9671, "step": 1826 }, { "epoch": 0.0654287607212563, "grad_norm": 1.9460965394973755, "learning_rate": 0.00019934275340044013, "loss": 1.7569, "step": 1827 }, { "epoch": 0.06546457285082458, "grad_norm": 2.1091198921203613, "learning_rate": 0.0001993414250765747, "loss": 1.6684, "step": 1828 }, { "epoch": 0.06550038498039286, "grad_norm": 1.813179612159729, "learning_rate": 0.0001993400954161985, "loss": 1.6355, "step": 1829 }, { "epoch": 0.06553619710996114, "grad_norm": 1.5191233158111572, "learning_rate": 0.00019933876441932943, "loss": 1.7589, "step": 1830 }, { "epoch": 0.06557200923952942, "grad_norm": 2.3240489959716797, "learning_rate": 0.00019933743208598546, "loss": 1.6427, "step": 1831 }, { "epoch": 0.06560782136909771, "grad_norm": 1.6643846035003662, "learning_rate": 0.00019933609841618445, "loss": 1.8053, "step": 1832 }, { "epoch": 0.065643633498666, "grad_norm": 2.014047145843506, "learning_rate": 0.0001993347634099444, "loss": 1.3374, "step": 1833 }, { "epoch": 0.06567944562823429, "grad_norm": 1.8650319576263428, "learning_rate": 0.00019933342706728323, "loss": 1.9769, "step": 1834 }, { "epoch": 0.06571525775780257, "grad_norm": 1.386111855506897, "learning_rate": 0.0001993320893882189, "loss": 1.2967, "step": 1835 }, { "epoch": 0.06575106988737085, "grad_norm": 1.4114490747451782, "learning_rate": 0.00019933075037276949, "loss": 1.5003, "step": 1836 }, { "epoch": 0.06578688201693914, "grad_norm": 1.2881723642349243, "learning_rate": 0.00019932941002095294, "loss": 2.0214, "step": 1837 }, { "epoch": 0.06582269414650742, "grad_norm": 1.827712059020996, "learning_rate": 0.00019932806833278726, "loss": 1.4548, "step": 1838 }, { "epoch": 0.0658585062760757, "grad_norm": 2.9244232177734375, "learning_rate": 0.0001993267253082906, "loss": 1.6894, "step": 1839 }, { "epoch": 0.065894318405644, "grad_norm": 1.268809199333191, "learning_rate": 0.00019932538094748098, "loss": 1.5501, "step": 1840 }, { "epoch": 0.06593013053521228, "grad_norm": 1.1067763566970825, "learning_rate": 0.00019932403525037642, "loss": 1.6284, "step": 1841 }, { "epoch": 0.06596594266478056, "grad_norm": 2.2531087398529053, "learning_rate": 0.00019932268821699513, "loss": 1.7967, "step": 1842 }, { "epoch": 0.06600175479434885, "grad_norm": 2.0509607791900635, "learning_rate": 0.0001993213398473552, "loss": 1.2527, "step": 1843 }, { "epoch": 0.06603756692391713, "grad_norm": 2.42293381690979, "learning_rate": 0.00019931999014147472, "loss": 1.8235, "step": 1844 }, { "epoch": 0.06607337905348541, "grad_norm": 1.7757221460342407, "learning_rate": 0.0001993186390993719, "loss": 2.0193, "step": 1845 }, { "epoch": 0.0661091911830537, "grad_norm": 1.6632944345474243, "learning_rate": 0.0001993172867210649, "loss": 1.8347, "step": 1846 }, { "epoch": 0.06614500331262199, "grad_norm": 2.6417300701141357, "learning_rate": 0.00019931593300657192, "loss": 2.097, "step": 1847 }, { "epoch": 0.06618081544219027, "grad_norm": 2.0074949264526367, "learning_rate": 0.00019931457795591118, "loss": 1.5325, "step": 1848 }, { "epoch": 0.06621662757175856, "grad_norm": 1.9241271018981934, "learning_rate": 0.00019931322156910088, "loss": 1.7955, "step": 1849 }, { "epoch": 0.06625243970132684, "grad_norm": 1.4078401327133179, "learning_rate": 0.0001993118638461593, "loss": 1.6546, "step": 1850 }, { "epoch": 0.06628825183089512, "grad_norm": 1.9178889989852905, "learning_rate": 0.00019931050478710468, "loss": 1.7818, "step": 1851 }, { "epoch": 0.0663240639604634, "grad_norm": 2.1118886470794678, "learning_rate": 0.00019930914439195534, "loss": 1.7646, "step": 1852 }, { "epoch": 0.06635987609003169, "grad_norm": 2.849180221557617, "learning_rate": 0.00019930778266072957, "loss": 1.6227, "step": 1853 }, { "epoch": 0.06639568821959999, "grad_norm": 2.1016101837158203, "learning_rate": 0.00019930641959344566, "loss": 1.263, "step": 1854 }, { "epoch": 0.06643150034916827, "grad_norm": 1.8419221639633179, "learning_rate": 0.00019930505519012197, "loss": 1.5502, "step": 1855 }, { "epoch": 0.06646731247873655, "grad_norm": 2.3808183670043945, "learning_rate": 0.0001993036894507769, "loss": 1.8669, "step": 1856 }, { "epoch": 0.06650312460830483, "grad_norm": 1.957215666770935, "learning_rate": 0.00019930232237542873, "loss": 1.7474, "step": 1857 }, { "epoch": 0.06653893673787312, "grad_norm": 1.7244082689285278, "learning_rate": 0.0001993009539640959, "loss": 1.6863, "step": 1858 }, { "epoch": 0.0665747488674414, "grad_norm": 1.3676176071166992, "learning_rate": 0.00019929958421679685, "loss": 1.6681, "step": 1859 }, { "epoch": 0.06661056099700968, "grad_norm": 1.637736439704895, "learning_rate": 0.00019929821313354997, "loss": 1.8369, "step": 1860 }, { "epoch": 0.06664637312657797, "grad_norm": 1.4250011444091797, "learning_rate": 0.00019929684071437373, "loss": 1.7336, "step": 1861 }, { "epoch": 0.06668218525614626, "grad_norm": 1.5548502206802368, "learning_rate": 0.00019929546695928658, "loss": 1.8462, "step": 1862 }, { "epoch": 0.06671799738571454, "grad_norm": 1.8258202075958252, "learning_rate": 0.000199294091868307, "loss": 1.6896, "step": 1863 }, { "epoch": 0.06675380951528283, "grad_norm": 1.2990753650665283, "learning_rate": 0.0001992927154414535, "loss": 1.6857, "step": 1864 }, { "epoch": 0.06678962164485111, "grad_norm": 1.2354774475097656, "learning_rate": 0.00019929133767874454, "loss": 1.5531, "step": 1865 }, { "epoch": 0.0668254337744194, "grad_norm": 1.4240732192993164, "learning_rate": 0.0001992899585801988, "loss": 1.8465, "step": 1866 }, { "epoch": 0.06686124590398768, "grad_norm": 1.7457743883132935, "learning_rate": 0.0001992885781458347, "loss": 1.7631, "step": 1867 }, { "epoch": 0.06689705803355596, "grad_norm": 1.6174548864364624, "learning_rate": 0.0001992871963756708, "loss": 1.5175, "step": 1868 }, { "epoch": 0.06693287016312426, "grad_norm": 1.0970457792282104, "learning_rate": 0.00019928581326972582, "loss": 1.6494, "step": 1869 }, { "epoch": 0.06696868229269254, "grad_norm": 1.3978979587554932, "learning_rate": 0.00019928442882801825, "loss": 1.5092, "step": 1870 }, { "epoch": 0.06700449442226082, "grad_norm": 1.5778833627700806, "learning_rate": 0.00019928304305056677, "loss": 1.7631, "step": 1871 }, { "epoch": 0.0670403065518291, "grad_norm": 1.6992747783660889, "learning_rate": 0.00019928165593739, "loss": 1.8914, "step": 1872 }, { "epoch": 0.06707611868139739, "grad_norm": 1.8649605512619019, "learning_rate": 0.00019928026748850663, "loss": 1.8694, "step": 1873 }, { "epoch": 0.06711193081096567, "grad_norm": 1.2663445472717285, "learning_rate": 0.00019927887770393533, "loss": 1.757, "step": 1874 }, { "epoch": 0.06714774294053395, "grad_norm": 1.696965217590332, "learning_rate": 0.0001992774865836948, "loss": 1.7698, "step": 1875 }, { "epoch": 0.06718355507010225, "grad_norm": 2.0695016384124756, "learning_rate": 0.0001992760941278037, "loss": 1.6721, "step": 1876 }, { "epoch": 0.06721936719967053, "grad_norm": 1.6472669839859009, "learning_rate": 0.00019927470033628087, "loss": 1.5199, "step": 1877 }, { "epoch": 0.06725517932923882, "grad_norm": 3.179795742034912, "learning_rate": 0.00019927330520914496, "loss": 1.4682, "step": 1878 }, { "epoch": 0.0672909914588071, "grad_norm": 1.6882466077804565, "learning_rate": 0.00019927190874641478, "loss": 1.8529, "step": 1879 }, { "epoch": 0.06732680358837538, "grad_norm": 1.6953445672988892, "learning_rate": 0.00019927051094810913, "loss": 1.9795, "step": 1880 }, { "epoch": 0.06736261571794366, "grad_norm": 1.3161673545837402, "learning_rate": 0.00019926911181424682, "loss": 1.7497, "step": 1881 }, { "epoch": 0.06739842784751195, "grad_norm": 1.8265464305877686, "learning_rate": 0.00019926771134484662, "loss": 1.5825, "step": 1882 }, { "epoch": 0.06743423997708024, "grad_norm": 2.6041676998138428, "learning_rate": 0.00019926630953992746, "loss": 1.9153, "step": 1883 }, { "epoch": 0.06747005210664853, "grad_norm": 2.21744441986084, "learning_rate": 0.00019926490639950812, "loss": 1.8852, "step": 1884 }, { "epoch": 0.06750586423621681, "grad_norm": 2.1859962940216064, "learning_rate": 0.00019926350192360753, "loss": 1.441, "step": 1885 }, { "epoch": 0.06754167636578509, "grad_norm": 1.6695815324783325, "learning_rate": 0.00019926209611224454, "loss": 1.5176, "step": 1886 }, { "epoch": 0.06757748849535337, "grad_norm": 1.3804771900177002, "learning_rate": 0.00019926068896543807, "loss": 1.5485, "step": 1887 }, { "epoch": 0.06761330062492166, "grad_norm": 1.9427920579910278, "learning_rate": 0.0001992592804832071, "loss": 1.6327, "step": 1888 }, { "epoch": 0.06764911275448994, "grad_norm": 1.2968367338180542, "learning_rate": 0.00019925787066557053, "loss": 1.7162, "step": 1889 }, { "epoch": 0.06768492488405824, "grad_norm": 1.447363257408142, "learning_rate": 0.00019925645951254735, "loss": 1.6678, "step": 1890 }, { "epoch": 0.06772073701362652, "grad_norm": 1.4006659984588623, "learning_rate": 0.00019925504702415653, "loss": 1.7161, "step": 1891 }, { "epoch": 0.0677565491431948, "grad_norm": 1.27907395362854, "learning_rate": 0.00019925363320041708, "loss": 1.5937, "step": 1892 }, { "epoch": 0.06779236127276309, "grad_norm": 1.2862017154693604, "learning_rate": 0.00019925221804134805, "loss": 1.6961, "step": 1893 }, { "epoch": 0.06782817340233137, "grad_norm": 2.0907084941864014, "learning_rate": 0.0001992508015469684, "loss": 1.5839, "step": 1894 }, { "epoch": 0.06786398553189965, "grad_norm": 2.4519996643066406, "learning_rate": 0.00019924938371729728, "loss": 1.772, "step": 1895 }, { "epoch": 0.06789979766146793, "grad_norm": 2.106823205947876, "learning_rate": 0.00019924796455235373, "loss": 1.9488, "step": 1896 }, { "epoch": 0.06793560979103623, "grad_norm": 1.8093961477279663, "learning_rate": 0.00019924654405215682, "loss": 1.5178, "step": 1897 }, { "epoch": 0.06797142192060451, "grad_norm": 2.3577721118927, "learning_rate": 0.00019924512221672572, "loss": 1.6168, "step": 1898 }, { "epoch": 0.0680072340501728, "grad_norm": 1.4707239866256714, "learning_rate": 0.00019924369904607945, "loss": 1.6463, "step": 1899 }, { "epoch": 0.06804304617974108, "grad_norm": 1.9914259910583496, "learning_rate": 0.00019924227454023728, "loss": 1.6292, "step": 1900 }, { "epoch": 0.06807885830930936, "grad_norm": 1.2515363693237305, "learning_rate": 0.0001992408486992183, "loss": 1.754, "step": 1901 }, { "epoch": 0.06811467043887764, "grad_norm": 2.06872296333313, "learning_rate": 0.00019923942152304169, "loss": 1.3609, "step": 1902 }, { "epoch": 0.06815048256844593, "grad_norm": 1.7905656099319458, "learning_rate": 0.0001992379930117267, "loss": 1.7038, "step": 1903 }, { "epoch": 0.06818629469801422, "grad_norm": 1.7369552850723267, "learning_rate": 0.00019923656316529252, "loss": 1.5613, "step": 1904 }, { "epoch": 0.06822210682758251, "grad_norm": 1.4253979921340942, "learning_rate": 0.00019923513198375837, "loss": 1.53, "step": 1905 }, { "epoch": 0.06825791895715079, "grad_norm": 1.3576210737228394, "learning_rate": 0.00019923369946714354, "loss": 1.9039, "step": 1906 }, { "epoch": 0.06829373108671907, "grad_norm": 2.030214548110962, "learning_rate": 0.00019923226561546726, "loss": 1.5617, "step": 1907 }, { "epoch": 0.06832954321628736, "grad_norm": 2.2546305656433105, "learning_rate": 0.00019923083042874885, "loss": 2.2339, "step": 1908 }, { "epoch": 0.06836535534585564, "grad_norm": 2.327280044555664, "learning_rate": 0.00019922939390700767, "loss": 1.5703, "step": 1909 }, { "epoch": 0.06840116747542392, "grad_norm": 2.571589708328247, "learning_rate": 0.00019922795605026295, "loss": 1.6462, "step": 1910 }, { "epoch": 0.0684369796049922, "grad_norm": 1.4403351545333862, "learning_rate": 0.00019922651685853407, "loss": 1.6363, "step": 1911 }, { "epoch": 0.0684727917345605, "grad_norm": 2.2027952671051025, "learning_rate": 0.0001992250763318404, "loss": 1.6944, "step": 1912 }, { "epoch": 0.06850860386412878, "grad_norm": 2.0483181476593018, "learning_rate": 0.00019922363447020134, "loss": 2.0797, "step": 1913 }, { "epoch": 0.06854441599369707, "grad_norm": 1.3529871702194214, "learning_rate": 0.00019922219127363624, "loss": 1.6436, "step": 1914 }, { "epoch": 0.06858022812326535, "grad_norm": 1.9239258766174316, "learning_rate": 0.00019922074674216456, "loss": 1.5636, "step": 1915 }, { "epoch": 0.06861604025283363, "grad_norm": 2.6495797634124756, "learning_rate": 0.00019921930087580573, "loss": 1.9851, "step": 1916 }, { "epoch": 0.06865185238240192, "grad_norm": 1.5434764623641968, "learning_rate": 0.00019921785367457917, "loss": 1.8326, "step": 1917 }, { "epoch": 0.0686876645119702, "grad_norm": 1.602738857269287, "learning_rate": 0.00019921640513850437, "loss": 1.5884, "step": 1918 }, { "epoch": 0.0687234766415385, "grad_norm": 1.9453281164169312, "learning_rate": 0.00019921495526760083, "loss": 1.6429, "step": 1919 }, { "epoch": 0.06875928877110678, "grad_norm": 1.6963716745376587, "learning_rate": 0.00019921350406188805, "loss": 1.642, "step": 1920 }, { "epoch": 0.06879510090067506, "grad_norm": 1.7032421827316284, "learning_rate": 0.00019921205152138556, "loss": 1.5783, "step": 1921 }, { "epoch": 0.06883091303024334, "grad_norm": 1.9125220775604248, "learning_rate": 0.00019921059764611284, "loss": 1.9889, "step": 1922 }, { "epoch": 0.06886672515981163, "grad_norm": 1.6646088361740112, "learning_rate": 0.00019920914243608956, "loss": 1.7274, "step": 1923 }, { "epoch": 0.06890253728937991, "grad_norm": 1.3573722839355469, "learning_rate": 0.0001992076858913352, "loss": 1.3907, "step": 1924 }, { "epoch": 0.06893834941894819, "grad_norm": 1.8801019191741943, "learning_rate": 0.0001992062280118694, "loss": 1.8737, "step": 1925 }, { "epoch": 0.06897416154851649, "grad_norm": 1.404797911643982, "learning_rate": 0.0001992047687977118, "loss": 1.5543, "step": 1926 }, { "epoch": 0.06900997367808477, "grad_norm": 2.4929728507995605, "learning_rate": 0.00019920330824888197, "loss": 1.8148, "step": 1927 }, { "epoch": 0.06904578580765305, "grad_norm": 1.5967156887054443, "learning_rate": 0.0001992018463653996, "loss": 1.7098, "step": 1928 }, { "epoch": 0.06908159793722134, "grad_norm": 1.629167079925537, "learning_rate": 0.00019920038314728434, "loss": 1.6494, "step": 1929 }, { "epoch": 0.06911741006678962, "grad_norm": 1.807207703590393, "learning_rate": 0.00019919891859455588, "loss": 1.8693, "step": 1930 }, { "epoch": 0.0691532221963579, "grad_norm": 1.9468473196029663, "learning_rate": 0.00019919745270723395, "loss": 1.7264, "step": 1931 }, { "epoch": 0.06918903432592619, "grad_norm": 1.7176305055618286, "learning_rate": 0.00019919598548533824, "loss": 1.8964, "step": 1932 }, { "epoch": 0.06922484645549448, "grad_norm": 2.3279569149017334, "learning_rate": 0.00019919451692888848, "loss": 1.7493, "step": 1933 }, { "epoch": 0.06926065858506276, "grad_norm": 2.4404523372650146, "learning_rate": 0.00019919304703790446, "loss": 1.8917, "step": 1934 }, { "epoch": 0.06929647071463105, "grad_norm": 1.455718755722046, "learning_rate": 0.00019919157581240596, "loss": 1.6557, "step": 1935 }, { "epoch": 0.06933228284419933, "grad_norm": 2.637948751449585, "learning_rate": 0.00019919010325241275, "loss": 1.4312, "step": 1936 }, { "epoch": 0.06936809497376761, "grad_norm": 1.4223843812942505, "learning_rate": 0.00019918862935794463, "loss": 2.0284, "step": 1937 }, { "epoch": 0.0694039071033359, "grad_norm": 2.231555461883545, "learning_rate": 0.00019918715412902142, "loss": 1.9927, "step": 1938 }, { "epoch": 0.06943971923290418, "grad_norm": 1.6516687870025635, "learning_rate": 0.00019918567756566305, "loss": 1.5935, "step": 1939 }, { "epoch": 0.06947553136247248, "grad_norm": 1.9006787538528442, "learning_rate": 0.0001991841996678893, "loss": 1.2902, "step": 1940 }, { "epoch": 0.06951134349204076, "grad_norm": 2.0806474685668945, "learning_rate": 0.0001991827204357201, "loss": 1.3603, "step": 1941 }, { "epoch": 0.06954715562160904, "grad_norm": 1.810377836227417, "learning_rate": 0.0001991812398691753, "loss": 1.6762, "step": 1942 }, { "epoch": 0.06958296775117732, "grad_norm": 1.6597081422805786, "learning_rate": 0.00019917975796827488, "loss": 1.6292, "step": 1943 }, { "epoch": 0.06961877988074561, "grad_norm": 1.8197176456451416, "learning_rate": 0.00019917827473303875, "loss": 1.5692, "step": 1944 }, { "epoch": 0.06965459201031389, "grad_norm": 1.2852716445922852, "learning_rate": 0.00019917679016348685, "loss": 1.6318, "step": 1945 }, { "epoch": 0.06969040413988217, "grad_norm": 1.7549954652786255, "learning_rate": 0.00019917530425963916, "loss": 1.8909, "step": 1946 }, { "epoch": 0.06972621626945047, "grad_norm": 1.5758832693099976, "learning_rate": 0.0001991738170215157, "loss": 1.5813, "step": 1947 }, { "epoch": 0.06976202839901875, "grad_norm": 2.270517587661743, "learning_rate": 0.00019917232844913644, "loss": 1.8209, "step": 1948 }, { "epoch": 0.06979784052858704, "grad_norm": 2.2871134281158447, "learning_rate": 0.00019917083854252142, "loss": 1.7456, "step": 1949 }, { "epoch": 0.06983365265815532, "grad_norm": 1.7921063899993896, "learning_rate": 0.00019916934730169073, "loss": 1.5367, "step": 1950 }, { "epoch": 0.0698694647877236, "grad_norm": 1.8753576278686523, "learning_rate": 0.00019916785472666435, "loss": 1.6617, "step": 1951 }, { "epoch": 0.06990527691729188, "grad_norm": 2.1969332695007324, "learning_rate": 0.0001991663608174624, "loss": 1.4074, "step": 1952 }, { "epoch": 0.06994108904686017, "grad_norm": 1.6860895156860352, "learning_rate": 0.000199164865574105, "loss": 1.9061, "step": 1953 }, { "epoch": 0.06997690117642846, "grad_norm": 1.7256300449371338, "learning_rate": 0.00019916336899661224, "loss": 1.7184, "step": 1954 }, { "epoch": 0.07001271330599675, "grad_norm": 1.6683882474899292, "learning_rate": 0.00019916187108500428, "loss": 1.6855, "step": 1955 }, { "epoch": 0.07004852543556503, "grad_norm": 1.431535243988037, "learning_rate": 0.00019916037183930122, "loss": 1.8098, "step": 1956 }, { "epoch": 0.07008433756513331, "grad_norm": 2.2073168754577637, "learning_rate": 0.00019915887125952327, "loss": 1.5942, "step": 1957 }, { "epoch": 0.0701201496947016, "grad_norm": 1.7813090085983276, "learning_rate": 0.00019915736934569066, "loss": 1.7848, "step": 1958 }, { "epoch": 0.07015596182426988, "grad_norm": 1.7625131607055664, "learning_rate": 0.0001991558660978235, "loss": 1.6697, "step": 1959 }, { "epoch": 0.07019177395383816, "grad_norm": 1.319684386253357, "learning_rate": 0.0001991543615159421, "loss": 1.8004, "step": 1960 }, { "epoch": 0.07022758608340644, "grad_norm": 1.4817261695861816, "learning_rate": 0.00019915285560006662, "loss": 1.7313, "step": 1961 }, { "epoch": 0.07026339821297474, "grad_norm": 1.8547471761703491, "learning_rate": 0.00019915134835021738, "loss": 1.8569, "step": 1962 }, { "epoch": 0.07029921034254302, "grad_norm": 1.6080464124679565, "learning_rate": 0.00019914983976641466, "loss": 1.4447, "step": 1963 }, { "epoch": 0.0703350224721113, "grad_norm": 1.452889323234558, "learning_rate": 0.00019914832984867874, "loss": 1.6103, "step": 1964 }, { "epoch": 0.07037083460167959, "grad_norm": 1.8950872421264648, "learning_rate": 0.0001991468185970299, "loss": 1.7781, "step": 1965 }, { "epoch": 0.07040664673124787, "grad_norm": 1.769492745399475, "learning_rate": 0.00019914530601148855, "loss": 1.708, "step": 1966 }, { "epoch": 0.07044245886081615, "grad_norm": 1.1686387062072754, "learning_rate": 0.000199143792092075, "loss": 1.6729, "step": 1967 }, { "epoch": 0.07047827099038444, "grad_norm": 1.3533289432525635, "learning_rate": 0.00019914227683880958, "loss": 1.6236, "step": 1968 }, { "epoch": 0.07051408311995273, "grad_norm": 1.8892587423324585, "learning_rate": 0.0001991407602517127, "loss": 1.7556, "step": 1969 }, { "epoch": 0.07054989524952102, "grad_norm": 1.8320790529251099, "learning_rate": 0.00019913924233080482, "loss": 1.4439, "step": 1970 }, { "epoch": 0.0705857073790893, "grad_norm": 2.168354034423828, "learning_rate": 0.0001991377230761063, "loss": 1.8053, "step": 1971 }, { "epoch": 0.07062151950865758, "grad_norm": 1.4819917678833008, "learning_rate": 0.00019913620248763756, "loss": 1.6767, "step": 1972 }, { "epoch": 0.07065733163822586, "grad_norm": 1.7416330575942993, "learning_rate": 0.0001991346805654191, "loss": 1.7843, "step": 1973 }, { "epoch": 0.07069314376779415, "grad_norm": 2.728843927383423, "learning_rate": 0.00019913315730947143, "loss": 1.7568, "step": 1974 }, { "epoch": 0.07072895589736243, "grad_norm": 1.4552961587905884, "learning_rate": 0.00019913163271981495, "loss": 1.3882, "step": 1975 }, { "epoch": 0.07076476802693073, "grad_norm": 1.9386544227600098, "learning_rate": 0.00019913010679647027, "loss": 1.5637, "step": 1976 }, { "epoch": 0.07080058015649901, "grad_norm": 3.1994082927703857, "learning_rate": 0.00019912857953945784, "loss": 1.7099, "step": 1977 }, { "epoch": 0.07083639228606729, "grad_norm": 1.4596532583236694, "learning_rate": 0.00019912705094879827, "loss": 1.5022, "step": 1978 }, { "epoch": 0.07087220441563558, "grad_norm": 1.96254563331604, "learning_rate": 0.00019912552102451206, "loss": 1.6754, "step": 1979 }, { "epoch": 0.07090801654520386, "grad_norm": 1.8212988376617432, "learning_rate": 0.00019912398976661984, "loss": 1.6848, "step": 1980 }, { "epoch": 0.07094382867477214, "grad_norm": 1.8282296657562256, "learning_rate": 0.0001991224571751422, "loss": 1.6715, "step": 1981 }, { "epoch": 0.07097964080434042, "grad_norm": 2.6742913722991943, "learning_rate": 0.00019912092325009975, "loss": 1.4798, "step": 1982 }, { "epoch": 0.07101545293390872, "grad_norm": 2.115812063217163, "learning_rate": 0.00019911938799151315, "loss": 1.8264, "step": 1983 }, { "epoch": 0.071051265063477, "grad_norm": 1.2611589431762695, "learning_rate": 0.00019911785139940303, "loss": 1.6957, "step": 1984 }, { "epoch": 0.07108707719304529, "grad_norm": 1.0900263786315918, "learning_rate": 0.00019911631347379008, "loss": 1.563, "step": 1985 }, { "epoch": 0.07112288932261357, "grad_norm": 1.8478407859802246, "learning_rate": 0.00019911477421469495, "loss": 1.6451, "step": 1986 }, { "epoch": 0.07115870145218185, "grad_norm": 2.0498433113098145, "learning_rate": 0.0001991132336221384, "loss": 1.4951, "step": 1987 }, { "epoch": 0.07119451358175014, "grad_norm": 1.6745867729187012, "learning_rate": 0.00019911169169614117, "loss": 1.7767, "step": 1988 }, { "epoch": 0.07123032571131842, "grad_norm": 1.856895923614502, "learning_rate": 0.00019911014843672394, "loss": 1.5433, "step": 1989 }, { "epoch": 0.07126613784088671, "grad_norm": 1.378844976425171, "learning_rate": 0.0001991086038439075, "loss": 1.7412, "step": 1990 }, { "epoch": 0.071301949970455, "grad_norm": 1.3555171489715576, "learning_rate": 0.00019910705791771263, "loss": 1.5704, "step": 1991 }, { "epoch": 0.07133776210002328, "grad_norm": 1.2136765718460083, "learning_rate": 0.00019910551065816017, "loss": 1.6872, "step": 1992 }, { "epoch": 0.07137357422959156, "grad_norm": 2.4193828105926514, "learning_rate": 0.0001991039620652709, "loss": 1.693, "step": 1993 }, { "epoch": 0.07140938635915985, "grad_norm": 1.5262309312820435, "learning_rate": 0.00019910241213906565, "loss": 1.6757, "step": 1994 }, { "epoch": 0.07144519848872813, "grad_norm": 1.5123639106750488, "learning_rate": 0.00019910086087956527, "loss": 1.556, "step": 1995 }, { "epoch": 0.07148101061829641, "grad_norm": 1.8455959558486938, "learning_rate": 0.00019909930828679063, "loss": 1.3509, "step": 1996 }, { "epoch": 0.07151682274786471, "grad_norm": 1.946921706199646, "learning_rate": 0.0001990977543607626, "loss": 1.7793, "step": 1997 }, { "epoch": 0.07155263487743299, "grad_norm": 2.3060195446014404, "learning_rate": 0.00019909619910150216, "loss": 1.8093, "step": 1998 }, { "epoch": 0.07158844700700127, "grad_norm": 1.4833565950393677, "learning_rate": 0.0001990946425090302, "loss": 1.5529, "step": 1999 }, { "epoch": 0.07162425913656956, "grad_norm": 1.3298144340515137, "learning_rate": 0.00019909308458336759, "loss": 1.7373, "step": 2000 }, { "epoch": 0.07166007126613784, "grad_norm": 1.6872481107711792, "learning_rate": 0.0001990915253245354, "loss": 1.7242, "step": 2001 }, { "epoch": 0.07169588339570612, "grad_norm": 1.7926667928695679, "learning_rate": 0.0001990899647325545, "loss": 1.6452, "step": 2002 }, { "epoch": 0.0717316955252744, "grad_norm": 1.7860251665115356, "learning_rate": 0.000199088402807446, "loss": 1.6085, "step": 2003 }, { "epoch": 0.0717675076548427, "grad_norm": 2.7218711376190186, "learning_rate": 0.00019908683954923082, "loss": 1.9528, "step": 2004 }, { "epoch": 0.07180331978441098, "grad_norm": 2.7485177516937256, "learning_rate": 0.00019908527495793004, "loss": 1.5438, "step": 2005 }, { "epoch": 0.07183913191397927, "grad_norm": 2.6853740215301514, "learning_rate": 0.00019908370903356468, "loss": 1.7663, "step": 2006 }, { "epoch": 0.07187494404354755, "grad_norm": 1.6789944171905518, "learning_rate": 0.00019908214177615584, "loss": 1.919, "step": 2007 }, { "epoch": 0.07191075617311583, "grad_norm": 2.4104981422424316, "learning_rate": 0.00019908057318572458, "loss": 1.3113, "step": 2008 }, { "epoch": 0.07194656830268412, "grad_norm": 1.2793664932250977, "learning_rate": 0.000199079003262292, "loss": 1.4809, "step": 2009 }, { "epoch": 0.0719823804322524, "grad_norm": 1.5413146018981934, "learning_rate": 0.00019907743200587926, "loss": 1.5148, "step": 2010 }, { "epoch": 0.07201819256182068, "grad_norm": 1.1401458978652954, "learning_rate": 0.00019907585941650747, "loss": 1.733, "step": 2011 }, { "epoch": 0.07205400469138898, "grad_norm": 1.4969557523727417, "learning_rate": 0.00019907428549419777, "loss": 1.7206, "step": 2012 }, { "epoch": 0.07208981682095726, "grad_norm": 1.5214531421661377, "learning_rate": 0.00019907271023897138, "loss": 1.8043, "step": 2013 }, { "epoch": 0.07212562895052554, "grad_norm": 1.6244412660598755, "learning_rate": 0.00019907113365084947, "loss": 1.7689, "step": 2014 }, { "epoch": 0.07216144108009383, "grad_norm": 1.6461654901504517, "learning_rate": 0.0001990695557298532, "loss": 1.5748, "step": 2015 }, { "epoch": 0.07219725320966211, "grad_norm": 1.6046760082244873, "learning_rate": 0.0001990679764760039, "loss": 1.8966, "step": 2016 }, { "epoch": 0.07223306533923039, "grad_norm": 1.2768783569335938, "learning_rate": 0.00019906639588932276, "loss": 1.6619, "step": 2017 }, { "epoch": 0.07226887746879868, "grad_norm": 1.3331384658813477, "learning_rate": 0.00019906481396983103, "loss": 1.6712, "step": 2018 }, { "epoch": 0.07230468959836697, "grad_norm": 1.4289017915725708, "learning_rate": 0.00019906323071755005, "loss": 1.7709, "step": 2019 }, { "epoch": 0.07234050172793526, "grad_norm": 1.8819347620010376, "learning_rate": 0.00019906164613250104, "loss": 1.9157, "step": 2020 }, { "epoch": 0.07237631385750354, "grad_norm": 1.380541443824768, "learning_rate": 0.00019906006021470538, "loss": 1.5728, "step": 2021 }, { "epoch": 0.07241212598707182, "grad_norm": 1.3870627880096436, "learning_rate": 0.00019905847296418437, "loss": 1.6477, "step": 2022 }, { "epoch": 0.0724479381166401, "grad_norm": 1.8019845485687256, "learning_rate": 0.0001990568843809594, "loss": 1.4675, "step": 2023 }, { "epoch": 0.07248375024620839, "grad_norm": 1.3413184881210327, "learning_rate": 0.00019905529446505183, "loss": 1.588, "step": 2024 }, { "epoch": 0.07251956237577667, "grad_norm": 1.5961971282958984, "learning_rate": 0.00019905370321648302, "loss": 1.6738, "step": 2025 }, { "epoch": 0.07255537450534497, "grad_norm": 2.0965704917907715, "learning_rate": 0.00019905211063527442, "loss": 1.5145, "step": 2026 }, { "epoch": 0.07259118663491325, "grad_norm": 1.2881782054901123, "learning_rate": 0.00019905051672144746, "loss": 1.6991, "step": 2027 }, { "epoch": 0.07262699876448153, "grad_norm": 1.2879416942596436, "learning_rate": 0.00019904892147502352, "loss": 1.795, "step": 2028 }, { "epoch": 0.07266281089404981, "grad_norm": 1.2281581163406372, "learning_rate": 0.00019904732489602417, "loss": 1.5426, "step": 2029 }, { "epoch": 0.0726986230236181, "grad_norm": 1.7174360752105713, "learning_rate": 0.00019904572698447077, "loss": 1.6864, "step": 2030 }, { "epoch": 0.07273443515318638, "grad_norm": 1.7847421169281006, "learning_rate": 0.00019904412774038487, "loss": 1.6732, "step": 2031 }, { "epoch": 0.07277024728275466, "grad_norm": 1.5283077955245972, "learning_rate": 0.000199042527163788, "loss": 1.7374, "step": 2032 }, { "epoch": 0.07280605941232296, "grad_norm": 1.7698839902877808, "learning_rate": 0.0001990409252547017, "loss": 1.8254, "step": 2033 }, { "epoch": 0.07284187154189124, "grad_norm": 1.3195661306381226, "learning_rate": 0.0001990393220131475, "loss": 1.8756, "step": 2034 }, { "epoch": 0.07287768367145953, "grad_norm": 1.8096301555633545, "learning_rate": 0.00019903771743914696, "loss": 1.4076, "step": 2035 }, { "epoch": 0.07291349580102781, "grad_norm": 1.5130648612976074, "learning_rate": 0.00019903611153272168, "loss": 1.2862, "step": 2036 }, { "epoch": 0.07294930793059609, "grad_norm": 2.1466357707977295, "learning_rate": 0.0001990345042938933, "loss": 1.6471, "step": 2037 }, { "epoch": 0.07298512006016437, "grad_norm": 1.3187168836593628, "learning_rate": 0.00019903289572268336, "loss": 1.6238, "step": 2038 }, { "epoch": 0.07302093218973266, "grad_norm": 1.909706473350525, "learning_rate": 0.0001990312858191136, "loss": 1.6599, "step": 2039 }, { "epoch": 0.07305674431930095, "grad_norm": 1.4692286252975464, "learning_rate": 0.0001990296745832056, "loss": 1.7824, "step": 2040 }, { "epoch": 0.07309255644886924, "grad_norm": 1.2459222078323364, "learning_rate": 0.00019902806201498106, "loss": 1.7793, "step": 2041 }, { "epoch": 0.07312836857843752, "grad_norm": 1.560295581817627, "learning_rate": 0.0001990264481144617, "loss": 1.7668, "step": 2042 }, { "epoch": 0.0731641807080058, "grad_norm": 1.6913843154907227, "learning_rate": 0.00019902483288166922, "loss": 1.6637, "step": 2043 }, { "epoch": 0.07319999283757408, "grad_norm": 1.3807941675186157, "learning_rate": 0.00019902321631662533, "loss": 1.5641, "step": 2044 }, { "epoch": 0.07323580496714237, "grad_norm": 2.5262813568115234, "learning_rate": 0.0001990215984193518, "loss": 1.4331, "step": 2045 }, { "epoch": 0.07327161709671065, "grad_norm": 1.294885277748108, "learning_rate": 0.00019901997918987042, "loss": 1.7085, "step": 2046 }, { "epoch": 0.07330742922627895, "grad_norm": 1.254361867904663, "learning_rate": 0.0001990183586282029, "loss": 1.5558, "step": 2047 }, { "epoch": 0.07334324135584723, "grad_norm": 1.6477164030075073, "learning_rate": 0.00019901673673437112, "loss": 1.6353, "step": 2048 }, { "epoch": 0.07337905348541551, "grad_norm": 2.1215851306915283, "learning_rate": 0.00019901511350839686, "loss": 2.1082, "step": 2049 }, { "epoch": 0.0734148656149838, "grad_norm": 1.359099268913269, "learning_rate": 0.00019901348895030196, "loss": 1.8811, "step": 2050 }, { "epoch": 0.07345067774455208, "grad_norm": 1.2427027225494385, "learning_rate": 0.0001990118630601083, "loss": 1.7966, "step": 2051 }, { "epoch": 0.07348648987412036, "grad_norm": 2.6582934856414795, "learning_rate": 0.00019901023583783776, "loss": 1.7436, "step": 2052 }, { "epoch": 0.07352230200368864, "grad_norm": 2.8324716091156006, "learning_rate": 0.00019900860728351216, "loss": 1.4201, "step": 2053 }, { "epoch": 0.07355811413325694, "grad_norm": 1.3952288627624512, "learning_rate": 0.00019900697739715347, "loss": 1.7557, "step": 2054 }, { "epoch": 0.07359392626282522, "grad_norm": 1.437296748161316, "learning_rate": 0.00019900534617878365, "loss": 1.7427, "step": 2055 }, { "epoch": 0.0736297383923935, "grad_norm": 2.397494077682495, "learning_rate": 0.0001990037136284246, "loss": 1.6845, "step": 2056 }, { "epoch": 0.07366555052196179, "grad_norm": 1.7484171390533447, "learning_rate": 0.00019900207974609822, "loss": 1.5456, "step": 2057 }, { "epoch": 0.07370136265153007, "grad_norm": 1.8446656465530396, "learning_rate": 0.00019900044453182662, "loss": 1.7223, "step": 2058 }, { "epoch": 0.07373717478109836, "grad_norm": 1.7288898229599, "learning_rate": 0.00019899880798563172, "loss": 1.6006, "step": 2059 }, { "epoch": 0.07377298691066664, "grad_norm": 2.0306966304779053, "learning_rate": 0.00019899717010753558, "loss": 1.6236, "step": 2060 }, { "epoch": 0.07380879904023492, "grad_norm": 1.7749613523483276, "learning_rate": 0.0001989955308975602, "loss": 1.7825, "step": 2061 }, { "epoch": 0.07384461116980322, "grad_norm": 1.8691487312316895, "learning_rate": 0.00019899389035572763, "loss": 1.8369, "step": 2062 }, { "epoch": 0.0738804232993715, "grad_norm": 1.8923448324203491, "learning_rate": 0.00019899224848205998, "loss": 1.7227, "step": 2063 }, { "epoch": 0.07391623542893978, "grad_norm": 1.278397798538208, "learning_rate": 0.0001989906052765793, "loss": 1.5775, "step": 2064 }, { "epoch": 0.07395204755850807, "grad_norm": 1.4953033924102783, "learning_rate": 0.00019898896073930776, "loss": 1.7734, "step": 2065 }, { "epoch": 0.07398785968807635, "grad_norm": 1.5151599645614624, "learning_rate": 0.00019898731487026742, "loss": 1.6955, "step": 2066 }, { "epoch": 0.07402367181764463, "grad_norm": 1.816370964050293, "learning_rate": 0.00019898566766948038, "loss": 1.5642, "step": 2067 }, { "epoch": 0.07405948394721291, "grad_norm": 1.5039783716201782, "learning_rate": 0.00019898401913696892, "loss": 1.492, "step": 2068 }, { "epoch": 0.07409529607678121, "grad_norm": 1.5567251443862915, "learning_rate": 0.00019898236927275517, "loss": 1.8026, "step": 2069 }, { "epoch": 0.0741311082063495, "grad_norm": 2.611471176147461, "learning_rate": 0.0001989807180768613, "loss": 1.8471, "step": 2070 }, { "epoch": 0.07416692033591778, "grad_norm": 1.6151888370513916, "learning_rate": 0.00019897906554930956, "loss": 1.6592, "step": 2071 }, { "epoch": 0.07420273246548606, "grad_norm": 2.0822031497955322, "learning_rate": 0.00019897741169012213, "loss": 1.8254, "step": 2072 }, { "epoch": 0.07423854459505434, "grad_norm": 1.3963028192520142, "learning_rate": 0.00019897575649932135, "loss": 1.6788, "step": 2073 }, { "epoch": 0.07427435672462263, "grad_norm": 1.506298542022705, "learning_rate": 0.0001989740999769294, "loss": 1.6061, "step": 2074 }, { "epoch": 0.07431016885419091, "grad_norm": 1.5757018327713013, "learning_rate": 0.0001989724421229686, "loss": 1.8792, "step": 2075 }, { "epoch": 0.0743459809837592, "grad_norm": 1.4515763521194458, "learning_rate": 0.00019897078293746128, "loss": 1.7142, "step": 2076 }, { "epoch": 0.07438179311332749, "grad_norm": 2.8991751670837402, "learning_rate": 0.0001989691224204297, "loss": 1.757, "step": 2077 }, { "epoch": 0.07441760524289577, "grad_norm": 1.243881344795227, "learning_rate": 0.0001989674605718963, "loss": 1.6967, "step": 2078 }, { "epoch": 0.07445341737246405, "grad_norm": 2.1475493907928467, "learning_rate": 0.00019896579739188335, "loss": 1.7954, "step": 2079 }, { "epoch": 0.07448922950203234, "grad_norm": 1.5451580286026, "learning_rate": 0.00019896413288041323, "loss": 1.6241, "step": 2080 }, { "epoch": 0.07452504163160062, "grad_norm": 1.7768465280532837, "learning_rate": 0.00019896246703750837, "loss": 1.6177, "step": 2081 }, { "epoch": 0.0745608537611689, "grad_norm": 1.5759029388427734, "learning_rate": 0.00019896079986319118, "loss": 1.8915, "step": 2082 }, { "epoch": 0.0745966658907372, "grad_norm": 1.9887797832489014, "learning_rate": 0.00019895913135748407, "loss": 1.6981, "step": 2083 }, { "epoch": 0.07463247802030548, "grad_norm": 1.390030860900879, "learning_rate": 0.0001989574615204095, "loss": 1.5878, "step": 2084 }, { "epoch": 0.07466829014987376, "grad_norm": 1.6922458410263062, "learning_rate": 0.0001989557903519899, "loss": 1.4414, "step": 2085 }, { "epoch": 0.07470410227944205, "grad_norm": 2.327239990234375, "learning_rate": 0.0001989541178522478, "loss": 1.753, "step": 2086 }, { "epoch": 0.07473991440901033, "grad_norm": 1.5192580223083496, "learning_rate": 0.0001989524440212057, "loss": 1.5827, "step": 2087 }, { "epoch": 0.07477572653857861, "grad_norm": 1.4516956806182861, "learning_rate": 0.00019895076885888613, "loss": 1.4345, "step": 2088 }, { "epoch": 0.0748115386681469, "grad_norm": 1.7795008420944214, "learning_rate": 0.00019894909236531158, "loss": 1.3601, "step": 2089 }, { "epoch": 0.07484735079771519, "grad_norm": 1.4862284660339355, "learning_rate": 0.0001989474145405046, "loss": 1.6291, "step": 2090 }, { "epoch": 0.07488316292728348, "grad_norm": 1.9060381650924683, "learning_rate": 0.00019894573538448783, "loss": 1.9217, "step": 2091 }, { "epoch": 0.07491897505685176, "grad_norm": 1.4660017490386963, "learning_rate": 0.0001989440548972838, "loss": 1.5406, "step": 2092 }, { "epoch": 0.07495478718642004, "grad_norm": 1.3798494338989258, "learning_rate": 0.0001989423730789151, "loss": 1.4285, "step": 2093 }, { "epoch": 0.07499059931598832, "grad_norm": 1.282052755355835, "learning_rate": 0.00019894068992940448, "loss": 1.7026, "step": 2094 }, { "epoch": 0.0750264114455566, "grad_norm": 3.1386232376098633, "learning_rate": 0.00019893900544877443, "loss": 1.7159, "step": 2095 }, { "epoch": 0.07506222357512489, "grad_norm": 1.8487104177474976, "learning_rate": 0.00019893731963704773, "loss": 1.4776, "step": 2096 }, { "epoch": 0.07509803570469319, "grad_norm": 1.7013107538223267, "learning_rate": 0.000198935632494247, "loss": 1.9181, "step": 2097 }, { "epoch": 0.07513384783426147, "grad_norm": 1.594666600227356, "learning_rate": 0.00019893394402039496, "loss": 1.7788, "step": 2098 }, { "epoch": 0.07516965996382975, "grad_norm": 1.4459965229034424, "learning_rate": 0.00019893225421551428, "loss": 1.6597, "step": 2099 }, { "epoch": 0.07520547209339803, "grad_norm": 2.2312240600585938, "learning_rate": 0.0001989305630796278, "loss": 1.5934, "step": 2100 }, { "epoch": 0.07524128422296632, "grad_norm": 1.4842166900634766, "learning_rate": 0.00019892887061275815, "loss": 1.7209, "step": 2101 }, { "epoch": 0.0752770963525346, "grad_norm": 2.3655948638916016, "learning_rate": 0.00019892717681492815, "loss": 1.5201, "step": 2102 }, { "epoch": 0.07531290848210288, "grad_norm": 1.5361484289169312, "learning_rate": 0.00019892548168616063, "loss": 1.6127, "step": 2103 }, { "epoch": 0.07534872061167118, "grad_norm": 2.2546679973602295, "learning_rate": 0.00019892378522647834, "loss": 1.3852, "step": 2104 }, { "epoch": 0.07538453274123946, "grad_norm": 1.4017812013626099, "learning_rate": 0.00019892208743590412, "loss": 1.4618, "step": 2105 }, { "epoch": 0.07542034487080775, "grad_norm": 1.399861216545105, "learning_rate": 0.00019892038831446085, "loss": 1.2668, "step": 2106 }, { "epoch": 0.07545615700037603, "grad_norm": 1.8196576833724976, "learning_rate": 0.0001989186878621713, "loss": 1.7222, "step": 2107 }, { "epoch": 0.07549196912994431, "grad_norm": 1.6640617847442627, "learning_rate": 0.00019891698607905843, "loss": 1.5111, "step": 2108 }, { "epoch": 0.0755277812595126, "grad_norm": 1.4460179805755615, "learning_rate": 0.0001989152829651451, "loss": 1.7697, "step": 2109 }, { "epoch": 0.07556359338908088, "grad_norm": 2.077554941177368, "learning_rate": 0.00019891357852045422, "loss": 1.5551, "step": 2110 }, { "epoch": 0.07559940551864916, "grad_norm": 2.658271312713623, "learning_rate": 0.00019891187274500874, "loss": 1.9367, "step": 2111 }, { "epoch": 0.07563521764821746, "grad_norm": 1.6757729053497314, "learning_rate": 0.0001989101656388316, "loss": 1.5789, "step": 2112 }, { "epoch": 0.07567102977778574, "grad_norm": 1.1914464235305786, "learning_rate": 0.00019890845720194576, "loss": 1.6118, "step": 2113 }, { "epoch": 0.07570684190735402, "grad_norm": 1.547780990600586, "learning_rate": 0.00019890674743437424, "loss": 1.8614, "step": 2114 }, { "epoch": 0.0757426540369223, "grad_norm": 2.2313425540924072, "learning_rate": 0.00019890503633614, "loss": 1.5434, "step": 2115 }, { "epoch": 0.07577846616649059, "grad_norm": 1.3298251628875732, "learning_rate": 0.00019890332390726606, "loss": 1.6958, "step": 2116 }, { "epoch": 0.07581427829605887, "grad_norm": 1.4307975769042969, "learning_rate": 0.00019890161014777546, "loss": 1.4825, "step": 2117 }, { "epoch": 0.07585009042562715, "grad_norm": 3.3594889640808105, "learning_rate": 0.0001988998950576913, "loss": 1.695, "step": 2118 }, { "epoch": 0.07588590255519545, "grad_norm": 1.8578377962112427, "learning_rate": 0.00019889817863703662, "loss": 1.7816, "step": 2119 }, { "epoch": 0.07592171468476373, "grad_norm": 1.5581252574920654, "learning_rate": 0.0001988964608858345, "loss": 1.889, "step": 2120 }, { "epoch": 0.07595752681433202, "grad_norm": 1.5691393613815308, "learning_rate": 0.00019889474180410805, "loss": 1.5541, "step": 2121 }, { "epoch": 0.0759933389439003, "grad_norm": 1.5680208206176758, "learning_rate": 0.00019889302139188044, "loss": 1.5807, "step": 2122 }, { "epoch": 0.07602915107346858, "grad_norm": 1.4914289712905884, "learning_rate": 0.00019889129964917478, "loss": 1.8856, "step": 2123 }, { "epoch": 0.07606496320303686, "grad_norm": 1.8300025463104248, "learning_rate": 0.00019888957657601425, "loss": 1.6327, "step": 2124 }, { "epoch": 0.07610077533260515, "grad_norm": 1.6856653690338135, "learning_rate": 0.00019888785217242206, "loss": 1.5229, "step": 2125 }, { "epoch": 0.07613658746217344, "grad_norm": 1.4205312728881836, "learning_rate": 0.00019888612643842132, "loss": 1.5932, "step": 2126 }, { "epoch": 0.07617239959174173, "grad_norm": 1.4903684854507446, "learning_rate": 0.00019888439937403534, "loss": 1.7948, "step": 2127 }, { "epoch": 0.07620821172131001, "grad_norm": 1.770424485206604, "learning_rate": 0.0001988826709792873, "loss": 1.8025, "step": 2128 }, { "epoch": 0.07624402385087829, "grad_norm": 1.9164286851882935, "learning_rate": 0.00019888094125420044, "loss": 1.7622, "step": 2129 }, { "epoch": 0.07627983598044658, "grad_norm": 2.1902430057525635, "learning_rate": 0.00019887921019879812, "loss": 1.94, "step": 2130 }, { "epoch": 0.07631564811001486, "grad_norm": 1.942354679107666, "learning_rate": 0.00019887747781310356, "loss": 1.6829, "step": 2131 }, { "epoch": 0.07635146023958314, "grad_norm": 2.437299966812134, "learning_rate": 0.00019887574409714005, "loss": 1.8157, "step": 2132 }, { "epoch": 0.07638727236915144, "grad_norm": 1.7092030048370361, "learning_rate": 0.00019887400905093096, "loss": 1.8297, "step": 2133 }, { "epoch": 0.07642308449871972, "grad_norm": 1.8825984001159668, "learning_rate": 0.00019887227267449963, "loss": 1.739, "step": 2134 }, { "epoch": 0.076458896628288, "grad_norm": 1.9578901529312134, "learning_rate": 0.00019887053496786937, "loss": 1.772, "step": 2135 }, { "epoch": 0.07649470875785629, "grad_norm": 0.9991260170936584, "learning_rate": 0.00019886879593106365, "loss": 1.3756, "step": 2136 }, { "epoch": 0.07653052088742457, "grad_norm": 1.4741476774215698, "learning_rate": 0.00019886705556410576, "loss": 1.4068, "step": 2137 }, { "epoch": 0.07656633301699285, "grad_norm": 1.7536135911941528, "learning_rate": 0.0001988653138670192, "loss": 1.7613, "step": 2138 }, { "epoch": 0.07660214514656113, "grad_norm": 1.7891243696212769, "learning_rate": 0.00019886357083982734, "loss": 1.5153, "step": 2139 }, { "epoch": 0.07663795727612943, "grad_norm": 1.2617411613464355, "learning_rate": 0.0001988618264825537, "loss": 1.597, "step": 2140 }, { "epoch": 0.07667376940569771, "grad_norm": 1.4487390518188477, "learning_rate": 0.00019886008079522167, "loss": 1.6301, "step": 2141 }, { "epoch": 0.076709581535266, "grad_norm": 1.4209492206573486, "learning_rate": 0.0001988583337778548, "loss": 1.6636, "step": 2142 }, { "epoch": 0.07674539366483428, "grad_norm": 1.6687378883361816, "learning_rate": 0.00019885658543047655, "loss": 1.7059, "step": 2143 }, { "epoch": 0.07678120579440256, "grad_norm": 1.7952252626419067, "learning_rate": 0.00019885483575311045, "loss": 2.0421, "step": 2144 }, { "epoch": 0.07681701792397085, "grad_norm": 1.4075217247009277, "learning_rate": 0.00019885308474578008, "loss": 1.5278, "step": 2145 }, { "epoch": 0.07685283005353913, "grad_norm": 1.9520444869995117, "learning_rate": 0.00019885133240850892, "loss": 1.9848, "step": 2146 }, { "epoch": 0.07688864218310743, "grad_norm": 2.3137176036834717, "learning_rate": 0.00019884957874132065, "loss": 2.0316, "step": 2147 }, { "epoch": 0.07692445431267571, "grad_norm": 1.3930728435516357, "learning_rate": 0.00019884782374423877, "loss": 1.6199, "step": 2148 }, { "epoch": 0.07696026644224399, "grad_norm": 1.7720856666564941, "learning_rate": 0.00019884606741728692, "loss": 1.4512, "step": 2149 }, { "epoch": 0.07699607857181227, "grad_norm": 1.3794426918029785, "learning_rate": 0.00019884430976048877, "loss": 1.8775, "step": 2150 }, { "epoch": 0.07703189070138056, "grad_norm": 1.3538085222244263, "learning_rate": 0.00019884255077386788, "loss": 1.4677, "step": 2151 }, { "epoch": 0.07706770283094884, "grad_norm": 1.767635703086853, "learning_rate": 0.000198840790457448, "loss": 1.4348, "step": 2152 }, { "epoch": 0.07710351496051712, "grad_norm": 1.4579626321792603, "learning_rate": 0.00019883902881125278, "loss": 1.756, "step": 2153 }, { "epoch": 0.0771393270900854, "grad_norm": 1.3255012035369873, "learning_rate": 0.00019883726583530594, "loss": 1.6512, "step": 2154 }, { "epoch": 0.0771751392196537, "grad_norm": 1.5879746675491333, "learning_rate": 0.00019883550152963113, "loss": 1.6947, "step": 2155 }, { "epoch": 0.07721095134922198, "grad_norm": 1.8822317123413086, "learning_rate": 0.00019883373589425215, "loss": 1.702, "step": 2156 }, { "epoch": 0.07724676347879027, "grad_norm": 2.246804714202881, "learning_rate": 0.00019883196892919275, "loss": 1.6123, "step": 2157 }, { "epoch": 0.07728257560835855, "grad_norm": 1.7975350618362427, "learning_rate": 0.00019883020063447672, "loss": 1.6843, "step": 2158 }, { "epoch": 0.07731838773792683, "grad_norm": 1.6588022708892822, "learning_rate": 0.00019882843101012778, "loss": 1.7215, "step": 2159 }, { "epoch": 0.07735419986749512, "grad_norm": 1.990808367729187, "learning_rate": 0.00019882666005616978, "loss": 1.729, "step": 2160 }, { "epoch": 0.0773900119970634, "grad_norm": 2.0240907669067383, "learning_rate": 0.00019882488777262655, "loss": 1.299, "step": 2161 }, { "epoch": 0.0774258241266317, "grad_norm": 1.273319959640503, "learning_rate": 0.00019882311415952194, "loss": 1.2768, "step": 2162 }, { "epoch": 0.07746163625619998, "grad_norm": 3.0127789974212646, "learning_rate": 0.00019882133921687983, "loss": 1.7046, "step": 2163 }, { "epoch": 0.07749744838576826, "grad_norm": 1.886521577835083, "learning_rate": 0.00019881956294472405, "loss": 1.62, "step": 2164 }, { "epoch": 0.07753326051533654, "grad_norm": 1.6154439449310303, "learning_rate": 0.00019881778534307852, "loss": 1.5143, "step": 2165 }, { "epoch": 0.07756907264490483, "grad_norm": 2.255967617034912, "learning_rate": 0.0001988160064119671, "loss": 1.6454, "step": 2166 }, { "epoch": 0.07760488477447311, "grad_norm": 2.1320412158966064, "learning_rate": 0.00019881422615141385, "loss": 1.6159, "step": 2167 }, { "epoch": 0.07764069690404139, "grad_norm": 1.293569803237915, "learning_rate": 0.00019881244456144262, "loss": 1.7265, "step": 2168 }, { "epoch": 0.07767650903360969, "grad_norm": 1.4700331687927246, "learning_rate": 0.00019881066164207742, "loss": 1.6755, "step": 2169 }, { "epoch": 0.07771232116317797, "grad_norm": 2.1993863582611084, "learning_rate": 0.0001988088773933422, "loss": 1.7127, "step": 2170 }, { "epoch": 0.07774813329274625, "grad_norm": 2.8095591068267822, "learning_rate": 0.000198807091815261, "loss": 1.6606, "step": 2171 }, { "epoch": 0.07778394542231454, "grad_norm": 1.5221842527389526, "learning_rate": 0.00019880530490785784, "loss": 1.543, "step": 2172 }, { "epoch": 0.07781975755188282, "grad_norm": 1.5565546751022339, "learning_rate": 0.00019880351667115673, "loss": 1.3438, "step": 2173 }, { "epoch": 0.0778555696814511, "grad_norm": 1.7649588584899902, "learning_rate": 0.00019880172710518178, "loss": 1.7558, "step": 2174 }, { "epoch": 0.07789138181101939, "grad_norm": 2.050035238265991, "learning_rate": 0.00019879993620995702, "loss": 1.6594, "step": 2175 }, { "epoch": 0.07792719394058768, "grad_norm": 1.595670461654663, "learning_rate": 0.00019879814398550657, "loss": 2.0138, "step": 2176 }, { "epoch": 0.07796300607015597, "grad_norm": 1.8371297121047974, "learning_rate": 0.00019879635043185454, "loss": 1.6345, "step": 2177 }, { "epoch": 0.07799881819972425, "grad_norm": 1.5137767791748047, "learning_rate": 0.00019879455554902502, "loss": 1.8678, "step": 2178 }, { "epoch": 0.07803463032929253, "grad_norm": 1.4583414793014526, "learning_rate": 0.00019879275933704224, "loss": 1.7769, "step": 2179 }, { "epoch": 0.07807044245886081, "grad_norm": 1.8222814798355103, "learning_rate": 0.00019879096179593027, "loss": 1.7373, "step": 2180 }, { "epoch": 0.0781062545884291, "grad_norm": 1.8903967142105103, "learning_rate": 0.00019878916292571334, "loss": 2.003, "step": 2181 }, { "epoch": 0.07814206671799738, "grad_norm": 2.2535934448242188, "learning_rate": 0.00019878736272641568, "loss": 1.661, "step": 2182 }, { "epoch": 0.07817787884756568, "grad_norm": 1.6508339643478394, "learning_rate": 0.00019878556119806148, "loss": 1.3687, "step": 2183 }, { "epoch": 0.07821369097713396, "grad_norm": 1.6056736707687378, "learning_rate": 0.00019878375834067496, "loss": 1.3639, "step": 2184 }, { "epoch": 0.07824950310670224, "grad_norm": 1.481017827987671, "learning_rate": 0.0001987819541542804, "loss": 1.9444, "step": 2185 }, { "epoch": 0.07828531523627053, "grad_norm": 1.7288168668746948, "learning_rate": 0.0001987801486389021, "loss": 1.9123, "step": 2186 }, { "epoch": 0.07832112736583881, "grad_norm": 2.189270257949829, "learning_rate": 0.00019877834179456424, "loss": 1.5186, "step": 2187 }, { "epoch": 0.07835693949540709, "grad_norm": 1.4086220264434814, "learning_rate": 0.00019877653362129126, "loss": 1.5263, "step": 2188 }, { "epoch": 0.07839275162497537, "grad_norm": 1.423419713973999, "learning_rate": 0.00019877472411910745, "loss": 1.6879, "step": 2189 }, { "epoch": 0.07842856375454367, "grad_norm": 1.8120468854904175, "learning_rate": 0.0001987729132880371, "loss": 1.725, "step": 2190 }, { "epoch": 0.07846437588411195, "grad_norm": 1.2922435998916626, "learning_rate": 0.00019877110112810463, "loss": 1.5935, "step": 2191 }, { "epoch": 0.07850018801368024, "grad_norm": 1.484522819519043, "learning_rate": 0.00019876928763933437, "loss": 1.5979, "step": 2192 }, { "epoch": 0.07853600014324852, "grad_norm": 1.5501383543014526, "learning_rate": 0.00019876747282175078, "loss": 1.6291, "step": 2193 }, { "epoch": 0.0785718122728168, "grad_norm": 1.7386257648468018, "learning_rate": 0.00019876565667537824, "loss": 1.5312, "step": 2194 }, { "epoch": 0.07860762440238508, "grad_norm": 1.6912481784820557, "learning_rate": 0.00019876383920024117, "loss": 1.5414, "step": 2195 }, { "epoch": 0.07864343653195337, "grad_norm": 1.5818910598754883, "learning_rate": 0.00019876202039636405, "loss": 1.8032, "step": 2196 }, { "epoch": 0.07867924866152166, "grad_norm": 1.9245262145996094, "learning_rate": 0.00019876020026377136, "loss": 1.3893, "step": 2197 }, { "epoch": 0.07871506079108995, "grad_norm": 1.422369360923767, "learning_rate": 0.00019875837880248756, "loss": 1.6805, "step": 2198 }, { "epoch": 0.07875087292065823, "grad_norm": 1.8367788791656494, "learning_rate": 0.00019875655601253714, "loss": 1.7323, "step": 2199 }, { "epoch": 0.07878668505022651, "grad_norm": 1.3831416368484497, "learning_rate": 0.00019875473189394463, "loss": 1.5551, "step": 2200 }, { "epoch": 0.0788224971797948, "grad_norm": 1.5825157165527344, "learning_rate": 0.00019875290644673463, "loss": 1.7588, "step": 2201 }, { "epoch": 0.07885830930936308, "grad_norm": 1.0356022119522095, "learning_rate": 0.00019875107967093163, "loss": 1.7062, "step": 2202 }, { "epoch": 0.07889412143893136, "grad_norm": 1.5128309726715088, "learning_rate": 0.00019874925156656024, "loss": 1.6296, "step": 2203 }, { "epoch": 0.07892993356849964, "grad_norm": 2.1393330097198486, "learning_rate": 0.00019874742213364506, "loss": 1.8722, "step": 2204 }, { "epoch": 0.07896574569806794, "grad_norm": 1.531294345855713, "learning_rate": 0.00019874559137221068, "loss": 1.5297, "step": 2205 }, { "epoch": 0.07900155782763622, "grad_norm": 1.653439998626709, "learning_rate": 0.00019874375928228175, "loss": 1.5084, "step": 2206 }, { "epoch": 0.0790373699572045, "grad_norm": 1.6008787155151367, "learning_rate": 0.00019874192586388288, "loss": 1.4493, "step": 2207 }, { "epoch": 0.07907318208677279, "grad_norm": 1.8827781677246094, "learning_rate": 0.00019874009111703878, "loss": 1.6156, "step": 2208 }, { "epoch": 0.07910899421634107, "grad_norm": 1.7337536811828613, "learning_rate": 0.00019873825504177414, "loss": 1.7076, "step": 2209 }, { "epoch": 0.07914480634590935, "grad_norm": 1.7455438375473022, "learning_rate": 0.0001987364176381136, "loss": 1.4984, "step": 2210 }, { "epoch": 0.07918061847547764, "grad_norm": 1.7682832479476929, "learning_rate": 0.00019873457890608198, "loss": 1.7267, "step": 2211 }, { "epoch": 0.07921643060504593, "grad_norm": 1.8477669954299927, "learning_rate": 0.0001987327388457039, "loss": 1.8183, "step": 2212 }, { "epoch": 0.07925224273461422, "grad_norm": 2.0341742038726807, "learning_rate": 0.0001987308974570042, "loss": 1.5857, "step": 2213 }, { "epoch": 0.0792880548641825, "grad_norm": 1.6047203540802002, "learning_rate": 0.0001987290547400076, "loss": 1.4304, "step": 2214 }, { "epoch": 0.07932386699375078, "grad_norm": 1.6200206279754639, "learning_rate": 0.000198727210694739, "loss": 1.3838, "step": 2215 }, { "epoch": 0.07935967912331907, "grad_norm": 1.4143503904342651, "learning_rate": 0.00019872536532122305, "loss": 1.4999, "step": 2216 }, { "epoch": 0.07939549125288735, "grad_norm": 1.7010365724563599, "learning_rate": 0.0001987235186194847, "loss": 1.9153, "step": 2217 }, { "epoch": 0.07943130338245563, "grad_norm": 1.9490126371383667, "learning_rate": 0.00019872167058954874, "loss": 1.5034, "step": 2218 }, { "epoch": 0.07946711551202393, "grad_norm": 1.496788740158081, "learning_rate": 0.00019871982123144004, "loss": 1.7765, "step": 2219 }, { "epoch": 0.07950292764159221, "grad_norm": 1.3413259983062744, "learning_rate": 0.00019871797054518347, "loss": 1.6507, "step": 2220 }, { "epoch": 0.0795387397711605, "grad_norm": 2.239360809326172, "learning_rate": 0.00019871611853080397, "loss": 1.8348, "step": 2221 }, { "epoch": 0.07957455190072878, "grad_norm": 1.3430578708648682, "learning_rate": 0.00019871426518832644, "loss": 1.7134, "step": 2222 }, { "epoch": 0.07961036403029706, "grad_norm": 1.7762681245803833, "learning_rate": 0.00019871241051777576, "loss": 1.8258, "step": 2223 }, { "epoch": 0.07964617615986534, "grad_norm": 3.437670946121216, "learning_rate": 0.00019871055451917694, "loss": 1.598, "step": 2224 }, { "epoch": 0.07968198828943363, "grad_norm": 1.7100651264190674, "learning_rate": 0.00019870869719255496, "loss": 1.5688, "step": 2225 }, { "epoch": 0.07971780041900192, "grad_norm": 1.7335412502288818, "learning_rate": 0.00019870683853793474, "loss": 1.4256, "step": 2226 }, { "epoch": 0.0797536125485702, "grad_norm": 1.4160618782043457, "learning_rate": 0.00019870497855534137, "loss": 1.6183, "step": 2227 }, { "epoch": 0.07978942467813849, "grad_norm": 1.4244014024734497, "learning_rate": 0.00019870311724479983, "loss": 1.7856, "step": 2228 }, { "epoch": 0.07982523680770677, "grad_norm": 1.4283264875411987, "learning_rate": 0.00019870125460633514, "loss": 1.4689, "step": 2229 }, { "epoch": 0.07986104893727505, "grad_norm": 1.4369860887527466, "learning_rate": 0.00019869939063997243, "loss": 1.6801, "step": 2230 }, { "epoch": 0.07989686106684334, "grad_norm": 1.8066489696502686, "learning_rate": 0.00019869752534573668, "loss": 1.4535, "step": 2231 }, { "epoch": 0.07993267319641162, "grad_norm": 2.8576557636260986, "learning_rate": 0.00019869565872365308, "loss": 1.7293, "step": 2232 }, { "epoch": 0.07996848532597992, "grad_norm": 1.3145085573196411, "learning_rate": 0.00019869379077374667, "loss": 1.7171, "step": 2233 }, { "epoch": 0.0800042974555482, "grad_norm": 2.2998170852661133, "learning_rate": 0.00019869192149604264, "loss": 1.6707, "step": 2234 }, { "epoch": 0.08004010958511648, "grad_norm": 1.755245566368103, "learning_rate": 0.0001986900508905661, "loss": 1.5189, "step": 2235 }, { "epoch": 0.08007592171468476, "grad_norm": 1.8190783262252808, "learning_rate": 0.00019868817895734222, "loss": 1.7181, "step": 2236 }, { "epoch": 0.08011173384425305, "grad_norm": 1.505210280418396, "learning_rate": 0.00019868630569639618, "loss": 1.5393, "step": 2237 }, { "epoch": 0.08014754597382133, "grad_norm": 2.068084478378296, "learning_rate": 0.0001986844311077532, "loss": 1.5744, "step": 2238 }, { "epoch": 0.08018335810338961, "grad_norm": 1.9969322681427002, "learning_rate": 0.0001986825551914385, "loss": 1.507, "step": 2239 }, { "epoch": 0.08021917023295791, "grad_norm": 3.0546181201934814, "learning_rate": 0.00019868067794747728, "loss": 1.8212, "step": 2240 }, { "epoch": 0.08025498236252619, "grad_norm": 1.3269965648651123, "learning_rate": 0.00019867879937589486, "loss": 1.6769, "step": 2241 }, { "epoch": 0.08029079449209447, "grad_norm": 1.6832436323165894, "learning_rate": 0.0001986769194767165, "loss": 1.7213, "step": 2242 }, { "epoch": 0.08032660662166276, "grad_norm": 1.4924203157424927, "learning_rate": 0.00019867503824996745, "loss": 1.6567, "step": 2243 }, { "epoch": 0.08036241875123104, "grad_norm": 1.7548980712890625, "learning_rate": 0.00019867315569567303, "loss": 1.571, "step": 2244 }, { "epoch": 0.08039823088079932, "grad_norm": 2.1904239654541016, "learning_rate": 0.0001986712718138586, "loss": 1.6707, "step": 2245 }, { "epoch": 0.0804340430103676, "grad_norm": 1.3834635019302368, "learning_rate": 0.00019866938660454949, "loss": 1.5114, "step": 2246 }, { "epoch": 0.0804698551399359, "grad_norm": 1.6187025308609009, "learning_rate": 0.00019866750006777102, "loss": 1.6336, "step": 2247 }, { "epoch": 0.08050566726950419, "grad_norm": 1.5296133756637573, "learning_rate": 0.00019866561220354862, "loss": 1.6381, "step": 2248 }, { "epoch": 0.08054147939907247, "grad_norm": 1.5018010139465332, "learning_rate": 0.0001986637230119077, "loss": 1.5588, "step": 2249 }, { "epoch": 0.08057729152864075, "grad_norm": 1.3861911296844482, "learning_rate": 0.00019866183249287364, "loss": 1.5929, "step": 2250 }, { "epoch": 0.08061310365820903, "grad_norm": 1.2317131757736206, "learning_rate": 0.00019865994064647188, "loss": 1.4665, "step": 2251 }, { "epoch": 0.08064891578777732, "grad_norm": 1.849687933921814, "learning_rate": 0.0001986580474727279, "loss": 1.5341, "step": 2252 }, { "epoch": 0.0806847279173456, "grad_norm": 1.3923594951629639, "learning_rate": 0.00019865615297166714, "loss": 1.7859, "step": 2253 }, { "epoch": 0.08072054004691388, "grad_norm": 1.4301036596298218, "learning_rate": 0.0001986542571433151, "loss": 1.8605, "step": 2254 }, { "epoch": 0.08075635217648218, "grad_norm": 1.6380246877670288, "learning_rate": 0.00019865235998769727, "loss": 1.7913, "step": 2255 }, { "epoch": 0.08079216430605046, "grad_norm": 2.1478631496429443, "learning_rate": 0.0001986504615048392, "loss": 1.5993, "step": 2256 }, { "epoch": 0.08082797643561875, "grad_norm": 1.3344511985778809, "learning_rate": 0.0001986485616947664, "loss": 1.718, "step": 2257 }, { "epoch": 0.08086378856518703, "grad_norm": 1.8746141195297241, "learning_rate": 0.00019864666055750452, "loss": 1.6871, "step": 2258 }, { "epoch": 0.08089960069475531, "grad_norm": 1.6850662231445312, "learning_rate": 0.000198644758093079, "loss": 1.8947, "step": 2259 }, { "epoch": 0.0809354128243236, "grad_norm": 1.7207536697387695, "learning_rate": 0.00019864285430151553, "loss": 1.6498, "step": 2260 }, { "epoch": 0.08097122495389188, "grad_norm": 1.387789011001587, "learning_rate": 0.00019864094918283968, "loss": 1.5868, "step": 2261 }, { "epoch": 0.08100703708346017, "grad_norm": 1.4519275426864624, "learning_rate": 0.0001986390427370771, "loss": 1.4775, "step": 2262 }, { "epoch": 0.08104284921302846, "grad_norm": 1.7079511880874634, "learning_rate": 0.00019863713496425347, "loss": 1.4262, "step": 2263 }, { "epoch": 0.08107866134259674, "grad_norm": 2.308537483215332, "learning_rate": 0.0001986352258643944, "loss": 1.5502, "step": 2264 }, { "epoch": 0.08111447347216502, "grad_norm": 1.9541075229644775, "learning_rate": 0.00019863331543752558, "loss": 1.6206, "step": 2265 }, { "epoch": 0.0811502856017333, "grad_norm": 1.6608632802963257, "learning_rate": 0.00019863140368367273, "loss": 1.5943, "step": 2266 }, { "epoch": 0.08118609773130159, "grad_norm": 1.6765403747558594, "learning_rate": 0.00019862949060286158, "loss": 1.852, "step": 2267 }, { "epoch": 0.08122190986086987, "grad_norm": 1.4967211484909058, "learning_rate": 0.00019862757619511784, "loss": 1.4903, "step": 2268 }, { "epoch": 0.08125772199043817, "grad_norm": 1.465468168258667, "learning_rate": 0.0001986256604604673, "loss": 1.5695, "step": 2269 }, { "epoch": 0.08129353412000645, "grad_norm": 1.5892409086227417, "learning_rate": 0.0001986237433989357, "loss": 1.6337, "step": 2270 }, { "epoch": 0.08132934624957473, "grad_norm": 1.3970975875854492, "learning_rate": 0.0001986218250105489, "loss": 1.6544, "step": 2271 }, { "epoch": 0.08136515837914302, "grad_norm": 2.048312187194824, "learning_rate": 0.0001986199052953326, "loss": 1.4003, "step": 2272 }, { "epoch": 0.0814009705087113, "grad_norm": 1.864230751991272, "learning_rate": 0.0001986179842533127, "loss": 1.4919, "step": 2273 }, { "epoch": 0.08143678263827958, "grad_norm": 1.4341579675674438, "learning_rate": 0.00019861606188451502, "loss": 1.5127, "step": 2274 }, { "epoch": 0.08147259476784786, "grad_norm": 1.5005865097045898, "learning_rate": 0.00019861413818896546, "loss": 1.6661, "step": 2275 }, { "epoch": 0.08150840689741616, "grad_norm": 1.1538370847702026, "learning_rate": 0.00019861221316668984, "loss": 1.4567, "step": 2276 }, { "epoch": 0.08154421902698444, "grad_norm": 1.613469123840332, "learning_rate": 0.0001986102868177141, "loss": 1.7542, "step": 2277 }, { "epoch": 0.08158003115655273, "grad_norm": 1.6482155323028564, "learning_rate": 0.0001986083591420642, "loss": 1.8616, "step": 2278 }, { "epoch": 0.08161584328612101, "grad_norm": 1.7545753717422485, "learning_rate": 0.00019860643013976597, "loss": 1.4438, "step": 2279 }, { "epoch": 0.08165165541568929, "grad_norm": 2.3131508827209473, "learning_rate": 0.00019860449981084545, "loss": 1.6012, "step": 2280 }, { "epoch": 0.08168746754525757, "grad_norm": 1.8130494356155396, "learning_rate": 0.00019860256815532854, "loss": 1.2831, "step": 2281 }, { "epoch": 0.08172327967482586, "grad_norm": 1.7892794609069824, "learning_rate": 0.0001986006351732413, "loss": 1.5343, "step": 2282 }, { "epoch": 0.08175909180439415, "grad_norm": 1.544555902481079, "learning_rate": 0.00019859870086460965, "loss": 1.708, "step": 2283 }, { "epoch": 0.08179490393396244, "grad_norm": 2.1215953826904297, "learning_rate": 0.0001985967652294597, "loss": 1.7509, "step": 2284 }, { "epoch": 0.08183071606353072, "grad_norm": 1.4176714420318604, "learning_rate": 0.00019859482826781744, "loss": 1.5661, "step": 2285 }, { "epoch": 0.081866528193099, "grad_norm": 1.7044744491577148, "learning_rate": 0.00019859288997970895, "loss": 1.7327, "step": 2286 }, { "epoch": 0.08190234032266729, "grad_norm": 1.9832544326782227, "learning_rate": 0.0001985909503651603, "loss": 1.4708, "step": 2287 }, { "epoch": 0.08193815245223557, "grad_norm": 1.8185745477676392, "learning_rate": 0.0001985890094241976, "loss": 1.7814, "step": 2288 }, { "epoch": 0.08197396458180385, "grad_norm": 1.557418942451477, "learning_rate": 0.0001985870671568469, "loss": 1.7562, "step": 2289 }, { "epoch": 0.08200977671137215, "grad_norm": 1.7640334367752075, "learning_rate": 0.00019858512356313445, "loss": 1.4121, "step": 2290 }, { "epoch": 0.08204558884094043, "grad_norm": 1.6936959028244019, "learning_rate": 0.00019858317864308628, "loss": 1.6866, "step": 2291 }, { "epoch": 0.08208140097050871, "grad_norm": 1.587742805480957, "learning_rate": 0.0001985812323967286, "loss": 1.6117, "step": 2292 }, { "epoch": 0.082117213100077, "grad_norm": 1.4579888582229614, "learning_rate": 0.00019857928482408763, "loss": 1.7537, "step": 2293 }, { "epoch": 0.08215302522964528, "grad_norm": 1.2377456426620483, "learning_rate": 0.00019857733592518954, "loss": 1.2892, "step": 2294 }, { "epoch": 0.08218883735921356, "grad_norm": 2.1315197944641113, "learning_rate": 0.00019857538570006053, "loss": 1.7453, "step": 2295 }, { "epoch": 0.08222464948878185, "grad_norm": 2.444917678833008, "learning_rate": 0.00019857343414872685, "loss": 1.8851, "step": 2296 }, { "epoch": 0.08226046161835014, "grad_norm": 2.752296209335327, "learning_rate": 0.0001985714812712148, "loss": 1.7393, "step": 2297 }, { "epoch": 0.08229627374791842, "grad_norm": 1.5612289905548096, "learning_rate": 0.0001985695270675506, "loss": 1.5019, "step": 2298 }, { "epoch": 0.08233208587748671, "grad_norm": 1.6323474645614624, "learning_rate": 0.00019856757153776058, "loss": 1.6256, "step": 2299 }, { "epoch": 0.08236789800705499, "grad_norm": 2.15275239944458, "learning_rate": 0.000198565614681871, "loss": 1.4934, "step": 2300 }, { "epoch": 0.08240371013662327, "grad_norm": 2.2830469608306885, "learning_rate": 0.0001985636564999082, "loss": 1.8445, "step": 2301 }, { "epoch": 0.08243952226619156, "grad_norm": 1.283078670501709, "learning_rate": 0.00019856169699189856, "loss": 1.6053, "step": 2302 }, { "epoch": 0.08247533439575984, "grad_norm": 2.0136337280273438, "learning_rate": 0.00019855973615786842, "loss": 1.7864, "step": 2303 }, { "epoch": 0.08251114652532812, "grad_norm": 1.7052009105682373, "learning_rate": 0.0001985577739978442, "loss": 1.2617, "step": 2304 }, { "epoch": 0.08254695865489642, "grad_norm": 1.6163235902786255, "learning_rate": 0.0001985558105118522, "loss": 1.575, "step": 2305 }, { "epoch": 0.0825827707844647, "grad_norm": 1.2028448581695557, "learning_rate": 0.00019855384569991892, "loss": 1.575, "step": 2306 }, { "epoch": 0.08261858291403298, "grad_norm": 1.1882579326629639, "learning_rate": 0.0001985518795620708, "loss": 1.7627, "step": 2307 }, { "epoch": 0.08265439504360127, "grad_norm": 1.8147140741348267, "learning_rate": 0.0001985499120983342, "loss": 1.8266, "step": 2308 }, { "epoch": 0.08269020717316955, "grad_norm": 1.386354684829712, "learning_rate": 0.00019854794330873568, "loss": 1.6571, "step": 2309 }, { "epoch": 0.08272601930273783, "grad_norm": 2.043935775756836, "learning_rate": 0.00019854597319330175, "loss": 1.6551, "step": 2310 }, { "epoch": 0.08276183143230612, "grad_norm": 1.749604344367981, "learning_rate": 0.00019854400175205883, "loss": 1.7481, "step": 2311 }, { "epoch": 0.08279764356187441, "grad_norm": 1.7542747259140015, "learning_rate": 0.00019854202898503346, "loss": 1.4622, "step": 2312 }, { "epoch": 0.0828334556914427, "grad_norm": 1.2417621612548828, "learning_rate": 0.00019854005489225224, "loss": 1.7303, "step": 2313 }, { "epoch": 0.08286926782101098, "grad_norm": 1.5347504615783691, "learning_rate": 0.00019853807947374166, "loss": 1.866, "step": 2314 }, { "epoch": 0.08290507995057926, "grad_norm": 1.5232512950897217, "learning_rate": 0.0001985361027295283, "loss": 1.5288, "step": 2315 }, { "epoch": 0.08294089208014754, "grad_norm": 1.9294697046279907, "learning_rate": 0.00019853412465963883, "loss": 1.5636, "step": 2316 }, { "epoch": 0.08297670420971583, "grad_norm": 1.68217933177948, "learning_rate": 0.0001985321452640998, "loss": 1.547, "step": 2317 }, { "epoch": 0.08301251633928411, "grad_norm": 2.1691644191741943, "learning_rate": 0.00019853016454293785, "loss": 1.6431, "step": 2318 }, { "epoch": 0.0830483284688524, "grad_norm": 1.4858300685882568, "learning_rate": 0.00019852818249617963, "loss": 1.7581, "step": 2319 }, { "epoch": 0.08308414059842069, "grad_norm": 1.6669294834136963, "learning_rate": 0.0001985261991238518, "loss": 1.4139, "step": 2320 }, { "epoch": 0.08311995272798897, "grad_norm": 1.9712706804275513, "learning_rate": 0.00019852421442598107, "loss": 1.7628, "step": 2321 }, { "epoch": 0.08315576485755725, "grad_norm": 2.3515987396240234, "learning_rate": 0.0001985222284025941, "loss": 1.9166, "step": 2322 }, { "epoch": 0.08319157698712554, "grad_norm": 1.5739303827285767, "learning_rate": 0.00019852024105371764, "loss": 1.7737, "step": 2323 }, { "epoch": 0.08322738911669382, "grad_norm": 1.4964985847473145, "learning_rate": 0.0001985182523793784, "loss": 1.3749, "step": 2324 }, { "epoch": 0.0832632012462621, "grad_norm": 1.9431986808776855, "learning_rate": 0.00019851626237960316, "loss": 1.7314, "step": 2325 }, { "epoch": 0.0832990133758304, "grad_norm": 1.4260855913162231, "learning_rate": 0.00019851427105441874, "loss": 1.6923, "step": 2326 }, { "epoch": 0.08333482550539868, "grad_norm": 2.3210411071777344, "learning_rate": 0.00019851227840385184, "loss": 1.6412, "step": 2327 }, { "epoch": 0.08337063763496697, "grad_norm": 1.6222409009933472, "learning_rate": 0.00019851028442792928, "loss": 1.7586, "step": 2328 }, { "epoch": 0.08340644976453525, "grad_norm": 1.9236634969711304, "learning_rate": 0.00019850828912667794, "loss": 1.8013, "step": 2329 }, { "epoch": 0.08344226189410353, "grad_norm": 1.6394051313400269, "learning_rate": 0.0001985062925001246, "loss": 1.5605, "step": 2330 }, { "epoch": 0.08347807402367181, "grad_norm": 1.769962191581726, "learning_rate": 0.0001985042945482962, "loss": 1.6226, "step": 2331 }, { "epoch": 0.0835138861532401, "grad_norm": 1.7239434719085693, "learning_rate": 0.00019850229527121956, "loss": 1.5937, "step": 2332 }, { "epoch": 0.0835496982828084, "grad_norm": 1.6355654001235962, "learning_rate": 0.00019850029466892161, "loss": 1.7279, "step": 2333 }, { "epoch": 0.08358551041237668, "grad_norm": 2.545929193496704, "learning_rate": 0.00019849829274142924, "loss": 1.472, "step": 2334 }, { "epoch": 0.08362132254194496, "grad_norm": 2.478335380554199, "learning_rate": 0.00019849628948876943, "loss": 2.0368, "step": 2335 }, { "epoch": 0.08365713467151324, "grad_norm": 1.5632565021514893, "learning_rate": 0.00019849428491096904, "loss": 1.5616, "step": 2336 }, { "epoch": 0.08369294680108152, "grad_norm": 1.9258129596710205, "learning_rate": 0.0001984922790080551, "loss": 1.7881, "step": 2337 }, { "epoch": 0.08372875893064981, "grad_norm": 1.5572803020477295, "learning_rate": 0.0001984902717800546, "loss": 1.4918, "step": 2338 }, { "epoch": 0.08376457106021809, "grad_norm": 1.6713584661483765, "learning_rate": 0.00019848826322699456, "loss": 1.5777, "step": 2339 }, { "epoch": 0.08380038318978639, "grad_norm": 1.7208898067474365, "learning_rate": 0.000198486253348902, "loss": 1.4879, "step": 2340 }, { "epoch": 0.08383619531935467, "grad_norm": 3.210822343826294, "learning_rate": 0.0001984842421458039, "loss": 1.8145, "step": 2341 }, { "epoch": 0.08387200744892295, "grad_norm": 1.435042142868042, "learning_rate": 0.00019848222961772733, "loss": 1.7716, "step": 2342 }, { "epoch": 0.08390781957849124, "grad_norm": 1.7752784490585327, "learning_rate": 0.00019848021576469944, "loss": 1.6036, "step": 2343 }, { "epoch": 0.08394363170805952, "grad_norm": 1.3135857582092285, "learning_rate": 0.00019847820058674728, "loss": 1.6017, "step": 2344 }, { "epoch": 0.0839794438376278, "grad_norm": 2.173564910888672, "learning_rate": 0.00019847618408389792, "loss": 1.7796, "step": 2345 }, { "epoch": 0.08401525596719608, "grad_norm": 2.061150312423706, "learning_rate": 0.00019847416625617855, "loss": 1.7466, "step": 2346 }, { "epoch": 0.08405106809676438, "grad_norm": 1.2300703525543213, "learning_rate": 0.0001984721471036163, "loss": 1.6624, "step": 2347 }, { "epoch": 0.08408688022633266, "grad_norm": 2.010895252227783, "learning_rate": 0.00019847012662623832, "loss": 1.5831, "step": 2348 }, { "epoch": 0.08412269235590095, "grad_norm": 1.3102781772613525, "learning_rate": 0.00019846810482407182, "loss": 1.7545, "step": 2349 }, { "epoch": 0.08415850448546923, "grad_norm": 1.5911515951156616, "learning_rate": 0.00019846608169714398, "loss": 1.3566, "step": 2350 }, { "epoch": 0.08419431661503751, "grad_norm": 2.5576908588409424, "learning_rate": 0.00019846405724548204, "loss": 1.5922, "step": 2351 }, { "epoch": 0.0842301287446058, "grad_norm": 1.8098796606063843, "learning_rate": 0.00019846203146911318, "loss": 1.8392, "step": 2352 }, { "epoch": 0.08426594087417408, "grad_norm": 1.828391432762146, "learning_rate": 0.00019846000436806471, "loss": 1.9675, "step": 2353 }, { "epoch": 0.08430175300374236, "grad_norm": 1.516196846961975, "learning_rate": 0.00019845797594236387, "loss": 1.596, "step": 2354 }, { "epoch": 0.08433756513331066, "grad_norm": 1.5106902122497559, "learning_rate": 0.00019845594619203797, "loss": 1.6667, "step": 2355 }, { "epoch": 0.08437337726287894, "grad_norm": 1.6493037939071655, "learning_rate": 0.00019845391511711435, "loss": 1.5757, "step": 2356 }, { "epoch": 0.08440918939244722, "grad_norm": 1.7102857828140259, "learning_rate": 0.00019845188271762029, "loss": 1.6952, "step": 2357 }, { "epoch": 0.0844450015220155, "grad_norm": 1.6236246824264526, "learning_rate": 0.0001984498489935831, "loss": 1.542, "step": 2358 }, { "epoch": 0.08448081365158379, "grad_norm": 1.3867934942245483, "learning_rate": 0.00019844781394503022, "loss": 1.5048, "step": 2359 }, { "epoch": 0.08451662578115207, "grad_norm": 2.9754772186279297, "learning_rate": 0.00019844577757198898, "loss": 1.6699, "step": 2360 }, { "epoch": 0.08455243791072035, "grad_norm": 1.5601086616516113, "learning_rate": 0.00019844373987448676, "loss": 1.5336, "step": 2361 }, { "epoch": 0.08458825004028865, "grad_norm": 1.989410638809204, "learning_rate": 0.00019844170085255104, "loss": 1.5408, "step": 2362 }, { "epoch": 0.08462406216985693, "grad_norm": 1.0998789072036743, "learning_rate": 0.0001984396605062092, "loss": 1.5359, "step": 2363 }, { "epoch": 0.08465987429942522, "grad_norm": 2.2412939071655273, "learning_rate": 0.00019843761883548872, "loss": 1.448, "step": 2364 }, { "epoch": 0.0846956864289935, "grad_norm": 1.2157700061798096, "learning_rate": 0.00019843557584041705, "loss": 1.4621, "step": 2365 }, { "epoch": 0.08473149855856178, "grad_norm": 1.5647035837173462, "learning_rate": 0.0001984335315210217, "loss": 1.8045, "step": 2366 }, { "epoch": 0.08476731068813007, "grad_norm": 1.3524651527404785, "learning_rate": 0.00019843148587733012, "loss": 1.778, "step": 2367 }, { "epoch": 0.08480312281769835, "grad_norm": 1.6187690496444702, "learning_rate": 0.00019842943890936986, "loss": 1.6222, "step": 2368 }, { "epoch": 0.08483893494726664, "grad_norm": 3.2864222526550293, "learning_rate": 0.00019842739061716848, "loss": 1.8601, "step": 2369 }, { "epoch": 0.08487474707683493, "grad_norm": 1.786603569984436, "learning_rate": 0.00019842534100075355, "loss": 1.9872, "step": 2370 }, { "epoch": 0.08491055920640321, "grad_norm": 1.6398714780807495, "learning_rate": 0.00019842329006015255, "loss": 1.5213, "step": 2371 }, { "epoch": 0.0849463713359715, "grad_norm": 1.576285719871521, "learning_rate": 0.0001984212377953932, "loss": 1.9445, "step": 2372 }, { "epoch": 0.08498218346553978, "grad_norm": 1.1794153451919556, "learning_rate": 0.00019841918420650302, "loss": 1.6204, "step": 2373 }, { "epoch": 0.08501799559510806, "grad_norm": 1.7451518774032593, "learning_rate": 0.00019841712929350965, "loss": 1.7095, "step": 2374 }, { "epoch": 0.08505380772467634, "grad_norm": 1.3258010149002075, "learning_rate": 0.0001984150730564408, "loss": 1.8024, "step": 2375 }, { "epoch": 0.08508961985424464, "grad_norm": 1.6353540420532227, "learning_rate": 0.00019841301549532409, "loss": 1.7716, "step": 2376 }, { "epoch": 0.08512543198381292, "grad_norm": 1.306315302848816, "learning_rate": 0.00019841095661018716, "loss": 1.6493, "step": 2377 }, { "epoch": 0.0851612441133812, "grad_norm": 1.7025679349899292, "learning_rate": 0.00019840889640105775, "loss": 1.8987, "step": 2378 }, { "epoch": 0.08519705624294949, "grad_norm": 2.471372127532959, "learning_rate": 0.00019840683486796362, "loss": 1.5562, "step": 2379 }, { "epoch": 0.08523286837251777, "grad_norm": 1.2974969148635864, "learning_rate": 0.00019840477201093243, "loss": 1.6721, "step": 2380 }, { "epoch": 0.08526868050208605, "grad_norm": 1.7710561752319336, "learning_rate": 0.00019840270782999197, "loss": 1.6326, "step": 2381 }, { "epoch": 0.08530449263165434, "grad_norm": 1.3455687761306763, "learning_rate": 0.00019840064232517, "loss": 1.7118, "step": 2382 }, { "epoch": 0.08534030476122263, "grad_norm": 1.3794631958007812, "learning_rate": 0.0001983985754964943, "loss": 1.5416, "step": 2383 }, { "epoch": 0.08537611689079092, "grad_norm": 1.446144938468933, "learning_rate": 0.00019839650734399276, "loss": 1.7194, "step": 2384 }, { "epoch": 0.0854119290203592, "grad_norm": 1.3965164422988892, "learning_rate": 0.0001983944378676931, "loss": 1.5514, "step": 2385 }, { "epoch": 0.08544774114992748, "grad_norm": 1.8332926034927368, "learning_rate": 0.00019839236706762318, "loss": 1.9941, "step": 2386 }, { "epoch": 0.08548355327949576, "grad_norm": 2.237839460372925, "learning_rate": 0.00019839029494381086, "loss": 1.5284, "step": 2387 }, { "epoch": 0.08551936540906405, "grad_norm": 1.652097463607788, "learning_rate": 0.0001983882214962841, "loss": 1.8933, "step": 2388 }, { "epoch": 0.08555517753863233, "grad_norm": 1.1951125860214233, "learning_rate": 0.00019838614672507067, "loss": 1.5353, "step": 2389 }, { "epoch": 0.08559098966820063, "grad_norm": 1.575891375541687, "learning_rate": 0.00019838407063019857, "loss": 1.9357, "step": 2390 }, { "epoch": 0.08562680179776891, "grad_norm": 1.398511290550232, "learning_rate": 0.0001983819932116957, "loss": 1.6936, "step": 2391 }, { "epoch": 0.08566261392733719, "grad_norm": 1.8677663803100586, "learning_rate": 0.00019837991446959005, "loss": 1.8502, "step": 2392 }, { "epoch": 0.08569842605690547, "grad_norm": 1.2182732820510864, "learning_rate": 0.0001983778344039095, "loss": 1.4945, "step": 2393 }, { "epoch": 0.08573423818647376, "grad_norm": 1.4245761632919312, "learning_rate": 0.00019837575301468211, "loss": 1.8349, "step": 2394 }, { "epoch": 0.08577005031604204, "grad_norm": 1.4133869409561157, "learning_rate": 0.00019837367030193587, "loss": 1.7366, "step": 2395 }, { "epoch": 0.08580586244561032, "grad_norm": 2.530789852142334, "learning_rate": 0.00019837158626569878, "loss": 2.013, "step": 2396 }, { "epoch": 0.08584167457517862, "grad_norm": 1.37733793258667, "learning_rate": 0.0001983695009059989, "loss": 1.7408, "step": 2397 }, { "epoch": 0.0858774867047469, "grad_norm": 1.6104776859283447, "learning_rate": 0.00019836741422286425, "loss": 1.5966, "step": 2398 }, { "epoch": 0.08591329883431519, "grad_norm": 1.435455322265625, "learning_rate": 0.00019836532621632293, "loss": 1.5554, "step": 2399 }, { "epoch": 0.08594911096388347, "grad_norm": 1.2741247415542603, "learning_rate": 0.000198363236886403, "loss": 1.3838, "step": 2400 }, { "epoch": 0.08598492309345175, "grad_norm": 1.459208369255066, "learning_rate": 0.00019836114623313265, "loss": 1.6547, "step": 2401 }, { "epoch": 0.08602073522302003, "grad_norm": 2.1880502700805664, "learning_rate": 0.00019835905425653994, "loss": 1.3995, "step": 2402 }, { "epoch": 0.08605654735258832, "grad_norm": 2.445836305618286, "learning_rate": 0.00019835696095665302, "loss": 1.9346, "step": 2403 }, { "epoch": 0.0860923594821566, "grad_norm": 2.707475185394287, "learning_rate": 0.00019835486633350006, "loss": 1.8816, "step": 2404 }, { "epoch": 0.0861281716117249, "grad_norm": 1.7417428493499756, "learning_rate": 0.00019835277038710928, "loss": 1.5025, "step": 2405 }, { "epoch": 0.08616398374129318, "grad_norm": 1.2903729677200317, "learning_rate": 0.00019835067311750878, "loss": 1.5742, "step": 2406 }, { "epoch": 0.08619979587086146, "grad_norm": 1.8366937637329102, "learning_rate": 0.00019834857452472686, "loss": 1.5567, "step": 2407 }, { "epoch": 0.08623560800042974, "grad_norm": 1.5683704614639282, "learning_rate": 0.00019834647460879174, "loss": 1.3516, "step": 2408 }, { "epoch": 0.08627142012999803, "grad_norm": 1.3883376121520996, "learning_rate": 0.00019834437336973165, "loss": 1.6229, "step": 2409 }, { "epoch": 0.08630723225956631, "grad_norm": 2.0175423622131348, "learning_rate": 0.00019834227080757488, "loss": 1.6116, "step": 2410 }, { "epoch": 0.0863430443891346, "grad_norm": 1.6487847566604614, "learning_rate": 0.0001983401669223497, "loss": 1.674, "step": 2411 }, { "epoch": 0.08637885651870289, "grad_norm": 1.7350643873214722, "learning_rate": 0.00019833806171408442, "loss": 1.7811, "step": 2412 }, { "epoch": 0.08641466864827117, "grad_norm": 1.6598446369171143, "learning_rate": 0.0001983359551828074, "loss": 1.6918, "step": 2413 }, { "epoch": 0.08645048077783946, "grad_norm": 5.592057228088379, "learning_rate": 0.0001983338473285469, "loss": 1.6704, "step": 2414 }, { "epoch": 0.08648629290740774, "grad_norm": 1.5842877626419067, "learning_rate": 0.00019833173815133134, "loss": 1.4705, "step": 2415 }, { "epoch": 0.08652210503697602, "grad_norm": 1.3319464921951294, "learning_rate": 0.0001983296276511891, "loss": 1.6647, "step": 2416 }, { "epoch": 0.0865579171665443, "grad_norm": 1.8691999912261963, "learning_rate": 0.00019832751582814855, "loss": 1.83, "step": 2417 }, { "epoch": 0.08659372929611259, "grad_norm": 2.428990125656128, "learning_rate": 0.0001983254026822381, "loss": 1.6859, "step": 2418 }, { "epoch": 0.08662954142568088, "grad_norm": 2.839390516281128, "learning_rate": 0.0001983232882134862, "loss": 1.4562, "step": 2419 }, { "epoch": 0.08666535355524917, "grad_norm": 1.6174662113189697, "learning_rate": 0.00019832117242192128, "loss": 1.4465, "step": 2420 }, { "epoch": 0.08670116568481745, "grad_norm": 1.4822794198989868, "learning_rate": 0.0001983190553075718, "loss": 1.4847, "step": 2421 }, { "epoch": 0.08673697781438573, "grad_norm": 1.9179503917694092, "learning_rate": 0.00019831693687046627, "loss": 1.8159, "step": 2422 }, { "epoch": 0.08677278994395402, "grad_norm": 3.1191704273223877, "learning_rate": 0.00019831481711063314, "loss": 1.8706, "step": 2423 }, { "epoch": 0.0868086020735223, "grad_norm": 1.2060275077819824, "learning_rate": 0.000198312696028101, "loss": 1.5291, "step": 2424 }, { "epoch": 0.08684441420309058, "grad_norm": 1.2168149948120117, "learning_rate": 0.00019831057362289833, "loss": 1.5218, "step": 2425 }, { "epoch": 0.08688022633265888, "grad_norm": 2.720611095428467, "learning_rate": 0.00019830844989505373, "loss": 1.8275, "step": 2426 }, { "epoch": 0.08691603846222716, "grad_norm": 1.599690318107605, "learning_rate": 0.00019830632484459573, "loss": 1.4722, "step": 2427 }, { "epoch": 0.08695185059179544, "grad_norm": 1.5266869068145752, "learning_rate": 0.00019830419847155292, "loss": 1.5618, "step": 2428 }, { "epoch": 0.08698766272136373, "grad_norm": 2.5052943229675293, "learning_rate": 0.00019830207077595392, "loss": 1.9813, "step": 2429 }, { "epoch": 0.08702347485093201, "grad_norm": 2.039900779724121, "learning_rate": 0.00019829994175782738, "loss": 1.7817, "step": 2430 }, { "epoch": 0.08705928698050029, "grad_norm": 1.7723355293273926, "learning_rate": 0.0001982978114172019, "loss": 1.8192, "step": 2431 }, { "epoch": 0.08709509911006857, "grad_norm": 1.4781324863433838, "learning_rate": 0.0001982956797541062, "loss": 1.5802, "step": 2432 }, { "epoch": 0.08713091123963687, "grad_norm": 1.499817967414856, "learning_rate": 0.0001982935467685689, "loss": 1.7145, "step": 2433 }, { "epoch": 0.08716672336920515, "grad_norm": 1.2955687046051025, "learning_rate": 0.0001982914124606187, "loss": 1.4076, "step": 2434 }, { "epoch": 0.08720253549877344, "grad_norm": 1.1812690496444702, "learning_rate": 0.00019828927683028435, "loss": 1.6311, "step": 2435 }, { "epoch": 0.08723834762834172, "grad_norm": 1.2968649864196777, "learning_rate": 0.00019828713987759454, "loss": 1.377, "step": 2436 }, { "epoch": 0.08727415975791, "grad_norm": 1.2757093906402588, "learning_rate": 0.00019828500160257807, "loss": 1.7077, "step": 2437 }, { "epoch": 0.08730997188747829, "grad_norm": 1.4437847137451172, "learning_rate": 0.0001982828620052637, "loss": 1.4253, "step": 2438 }, { "epoch": 0.08734578401704657, "grad_norm": 1.79239821434021, "learning_rate": 0.00019828072108568016, "loss": 1.8537, "step": 2439 }, { "epoch": 0.08738159614661486, "grad_norm": 1.204079031944275, "learning_rate": 0.0001982785788438563, "loss": 1.5567, "step": 2440 }, { "epoch": 0.08741740827618315, "grad_norm": 2.0811383724212646, "learning_rate": 0.00019827643527982095, "loss": 1.806, "step": 2441 }, { "epoch": 0.08745322040575143, "grad_norm": 1.5582301616668701, "learning_rate": 0.00019827429039360293, "loss": 1.7261, "step": 2442 }, { "epoch": 0.08748903253531971, "grad_norm": 1.1033960580825806, "learning_rate": 0.00019827214418523107, "loss": 1.7938, "step": 2443 }, { "epoch": 0.087524844664888, "grad_norm": 1.3593032360076904, "learning_rate": 0.0001982699966547343, "loss": 1.5597, "step": 2444 }, { "epoch": 0.08756065679445628, "grad_norm": 1.574497938156128, "learning_rate": 0.00019826784780214147, "loss": 1.3391, "step": 2445 }, { "epoch": 0.08759646892402456, "grad_norm": 2.002777338027954, "learning_rate": 0.0001982656976274815, "loss": 1.3655, "step": 2446 }, { "epoch": 0.08763228105359286, "grad_norm": 2.2492942810058594, "learning_rate": 0.00019826354613078332, "loss": 1.948, "step": 2447 }, { "epoch": 0.08766809318316114, "grad_norm": 1.3055859804153442, "learning_rate": 0.0001982613933120759, "loss": 1.6317, "step": 2448 }, { "epoch": 0.08770390531272942, "grad_norm": 2.0596539974212646, "learning_rate": 0.00019825923917138818, "loss": 1.8837, "step": 2449 }, { "epoch": 0.08773971744229771, "grad_norm": 1.592313289642334, "learning_rate": 0.0001982570837087491, "loss": 1.5169, "step": 2450 }, { "epoch": 0.08777552957186599, "grad_norm": 1.5785194635391235, "learning_rate": 0.00019825492692418774, "loss": 1.5085, "step": 2451 }, { "epoch": 0.08781134170143427, "grad_norm": 1.0770231485366821, "learning_rate": 0.00019825276881773308, "loss": 1.5045, "step": 2452 }, { "epoch": 0.08784715383100256, "grad_norm": 1.7435587644577026, "learning_rate": 0.00019825060938941414, "loss": 1.7879, "step": 2453 }, { "epoch": 0.08788296596057084, "grad_norm": 1.482975959777832, "learning_rate": 0.00019824844863925998, "loss": 1.722, "step": 2454 }, { "epoch": 0.08791877809013914, "grad_norm": 1.9586156606674194, "learning_rate": 0.0001982462865672997, "loss": 1.6066, "step": 2455 }, { "epoch": 0.08795459021970742, "grad_norm": 1.9015264511108398, "learning_rate": 0.00019824412317356234, "loss": 1.4479, "step": 2456 }, { "epoch": 0.0879904023492757, "grad_norm": 1.943438172340393, "learning_rate": 0.00019824195845807703, "loss": 1.5558, "step": 2457 }, { "epoch": 0.08802621447884398, "grad_norm": 1.8411654233932495, "learning_rate": 0.00019823979242087288, "loss": 1.3188, "step": 2458 }, { "epoch": 0.08806202660841227, "grad_norm": 1.542543888092041, "learning_rate": 0.00019823762506197907, "loss": 1.7211, "step": 2459 }, { "epoch": 0.08809783873798055, "grad_norm": 1.1874290704727173, "learning_rate": 0.0001982354563814247, "loss": 1.4066, "step": 2460 }, { "epoch": 0.08813365086754883, "grad_norm": 2.709726095199585, "learning_rate": 0.000198233286379239, "loss": 1.8428, "step": 2461 }, { "epoch": 0.08816946299711713, "grad_norm": 1.5549380779266357, "learning_rate": 0.00019823111505545114, "loss": 1.537, "step": 2462 }, { "epoch": 0.08820527512668541, "grad_norm": 2.0994925498962402, "learning_rate": 0.00019822894241009037, "loss": 1.4763, "step": 2463 }, { "epoch": 0.0882410872562537, "grad_norm": 1.4595978260040283, "learning_rate": 0.00019822676844318582, "loss": 1.6066, "step": 2464 }, { "epoch": 0.08827689938582198, "grad_norm": 1.3861949443817139, "learning_rate": 0.00019822459315476686, "loss": 1.5408, "step": 2465 }, { "epoch": 0.08831271151539026, "grad_norm": 1.820631980895996, "learning_rate": 0.00019822241654486266, "loss": 1.9246, "step": 2466 }, { "epoch": 0.08834852364495854, "grad_norm": 2.3096110820770264, "learning_rate": 0.00019822023861350256, "loss": 1.8691, "step": 2467 }, { "epoch": 0.08838433577452683, "grad_norm": 1.4352091550827026, "learning_rate": 0.00019821805936071584, "loss": 1.6238, "step": 2468 }, { "epoch": 0.08842014790409512, "grad_norm": 1.4374312162399292, "learning_rate": 0.00019821587878653184, "loss": 1.6068, "step": 2469 }, { "epoch": 0.0884559600336634, "grad_norm": 1.8493610620498657, "learning_rate": 0.00019821369689097988, "loss": 1.6721, "step": 2470 }, { "epoch": 0.08849177216323169, "grad_norm": 1.8211166858673096, "learning_rate": 0.00019821151367408927, "loss": 1.3961, "step": 2471 }, { "epoch": 0.08852758429279997, "grad_norm": 2.59659743309021, "learning_rate": 0.00019820932913588947, "loss": 1.8369, "step": 2472 }, { "epoch": 0.08856339642236825, "grad_norm": 1.3814586400985718, "learning_rate": 0.00019820714327640983, "loss": 1.2698, "step": 2473 }, { "epoch": 0.08859920855193654, "grad_norm": 1.4654902219772339, "learning_rate": 0.00019820495609567976, "loss": 1.6292, "step": 2474 }, { "epoch": 0.08863502068150482, "grad_norm": 1.361952543258667, "learning_rate": 0.00019820276759372867, "loss": 1.9109, "step": 2475 }, { "epoch": 0.08867083281107312, "grad_norm": 1.8597922325134277, "learning_rate": 0.00019820057777058598, "loss": 1.5002, "step": 2476 }, { "epoch": 0.0887066449406414, "grad_norm": 1.6137717962265015, "learning_rate": 0.00019819838662628122, "loss": 1.6735, "step": 2477 }, { "epoch": 0.08874245707020968, "grad_norm": 1.753365397453308, "learning_rate": 0.00019819619416084385, "loss": 1.8299, "step": 2478 }, { "epoch": 0.08877826919977796, "grad_norm": 1.4413976669311523, "learning_rate": 0.00019819400037430332, "loss": 1.5304, "step": 2479 }, { "epoch": 0.08881408132934625, "grad_norm": 1.604013442993164, "learning_rate": 0.0001981918052666892, "loss": 1.5374, "step": 2480 }, { "epoch": 0.08884989345891453, "grad_norm": 2.0614285469055176, "learning_rate": 0.00019818960883803097, "loss": 1.5122, "step": 2481 }, { "epoch": 0.08888570558848281, "grad_norm": 1.5279723405838013, "learning_rate": 0.00019818741108835824, "loss": 1.3099, "step": 2482 }, { "epoch": 0.08892151771805111, "grad_norm": 2.621316432952881, "learning_rate": 0.00019818521201770052, "loss": 1.8033, "step": 2483 }, { "epoch": 0.08895732984761939, "grad_norm": 1.9032632112503052, "learning_rate": 0.00019818301162608743, "loss": 1.7269, "step": 2484 }, { "epoch": 0.08899314197718768, "grad_norm": 1.6183884143829346, "learning_rate": 0.00019818080991354858, "loss": 1.4296, "step": 2485 }, { "epoch": 0.08902895410675596, "grad_norm": 1.493016004562378, "learning_rate": 0.00019817860688011357, "loss": 1.558, "step": 2486 }, { "epoch": 0.08906476623632424, "grad_norm": 1.3361989259719849, "learning_rate": 0.00019817640252581202, "loss": 1.3134, "step": 2487 }, { "epoch": 0.08910057836589252, "grad_norm": 2.0151519775390625, "learning_rate": 0.00019817419685067364, "loss": 2.0462, "step": 2488 }, { "epoch": 0.08913639049546081, "grad_norm": 1.6625710725784302, "learning_rate": 0.00019817198985472807, "loss": 1.5406, "step": 2489 }, { "epoch": 0.0891722026250291, "grad_norm": 1.9225102663040161, "learning_rate": 0.00019816978153800504, "loss": 1.7719, "step": 2490 }, { "epoch": 0.08920801475459739, "grad_norm": 1.7934019565582275, "learning_rate": 0.00019816757190053416, "loss": 1.6073, "step": 2491 }, { "epoch": 0.08924382688416567, "grad_norm": 1.8220930099487305, "learning_rate": 0.00019816536094234528, "loss": 1.7796, "step": 2492 }, { "epoch": 0.08927963901373395, "grad_norm": 1.3911093473434448, "learning_rate": 0.00019816314866346807, "loss": 1.4431, "step": 2493 }, { "epoch": 0.08931545114330224, "grad_norm": 1.3377423286437988, "learning_rate": 0.00019816093506393233, "loss": 1.6441, "step": 2494 }, { "epoch": 0.08935126327287052, "grad_norm": 2.121514320373535, "learning_rate": 0.00019815872014376784, "loss": 1.6375, "step": 2495 }, { "epoch": 0.0893870754024388, "grad_norm": 2.1512534618377686, "learning_rate": 0.00019815650390300434, "loss": 1.8018, "step": 2496 }, { "epoch": 0.0894228875320071, "grad_norm": 1.6939420700073242, "learning_rate": 0.00019815428634167176, "loss": 1.2262, "step": 2497 }, { "epoch": 0.08945869966157538, "grad_norm": 1.2576205730438232, "learning_rate": 0.00019815206745979981, "loss": 1.6021, "step": 2498 }, { "epoch": 0.08949451179114366, "grad_norm": 1.6956746578216553, "learning_rate": 0.00019814984725741842, "loss": 1.5378, "step": 2499 }, { "epoch": 0.08953032392071195, "grad_norm": 1.830729365348816, "learning_rate": 0.00019814762573455743, "loss": 1.9099, "step": 2500 }, { "epoch": 0.08956613605028023, "grad_norm": 1.476012110710144, "learning_rate": 0.00019814540289124675, "loss": 1.3992, "step": 2501 }, { "epoch": 0.08960194817984851, "grad_norm": 2.3555774688720703, "learning_rate": 0.00019814317872751626, "loss": 1.9159, "step": 2502 }, { "epoch": 0.0896377603094168, "grad_norm": 1.2655014991760254, "learning_rate": 0.0001981409532433959, "loss": 1.7051, "step": 2503 }, { "epoch": 0.08967357243898508, "grad_norm": 1.7695574760437012, "learning_rate": 0.00019813872643891563, "loss": 1.7181, "step": 2504 }, { "epoch": 0.08970938456855337, "grad_norm": 1.8145886659622192, "learning_rate": 0.00019813649831410535, "loss": 1.5635, "step": 2505 }, { "epoch": 0.08974519669812166, "grad_norm": 1.3770238161087036, "learning_rate": 0.00019813426886899509, "loss": 1.2597, "step": 2506 }, { "epoch": 0.08978100882768994, "grad_norm": 2.0309836864471436, "learning_rate": 0.00019813203810361483, "loss": 1.4215, "step": 2507 }, { "epoch": 0.08981682095725822, "grad_norm": 1.656128168106079, "learning_rate": 0.00019812980601799458, "loss": 1.5579, "step": 2508 }, { "epoch": 0.0898526330868265, "grad_norm": 1.2524288892745972, "learning_rate": 0.00019812757261216435, "loss": 1.7402, "step": 2509 }, { "epoch": 0.08988844521639479, "grad_norm": 1.3505594730377197, "learning_rate": 0.0001981253378861542, "loss": 1.6665, "step": 2510 }, { "epoch": 0.08992425734596307, "grad_norm": 2.9887189865112305, "learning_rate": 0.00019812310183999423, "loss": 1.9483, "step": 2511 }, { "epoch": 0.08996006947553137, "grad_norm": 1.1449756622314453, "learning_rate": 0.00019812086447371446, "loss": 1.6492, "step": 2512 }, { "epoch": 0.08999588160509965, "grad_norm": 1.5508500337600708, "learning_rate": 0.00019811862578734507, "loss": 1.9411, "step": 2513 }, { "epoch": 0.09003169373466793, "grad_norm": 1.6970720291137695, "learning_rate": 0.0001981163857809161, "loss": 1.7011, "step": 2514 }, { "epoch": 0.09006750586423622, "grad_norm": 1.9046391248703003, "learning_rate": 0.00019811414445445772, "loss": 1.4795, "step": 2515 }, { "epoch": 0.0901033179938045, "grad_norm": 1.9710471630096436, "learning_rate": 0.00019811190180800013, "loss": 1.8442, "step": 2516 }, { "epoch": 0.09013913012337278, "grad_norm": 1.5773948431015015, "learning_rate": 0.0001981096578415734, "loss": 1.569, "step": 2517 }, { "epoch": 0.09017494225294106, "grad_norm": 1.4161051511764526, "learning_rate": 0.00019810741255520782, "loss": 1.4553, "step": 2518 }, { "epoch": 0.09021075438250936, "grad_norm": 1.7593234777450562, "learning_rate": 0.0001981051659489335, "loss": 1.7854, "step": 2519 }, { "epoch": 0.09024656651207764, "grad_norm": 1.5166726112365723, "learning_rate": 0.00019810291802278078, "loss": 1.403, "step": 2520 }, { "epoch": 0.09028237864164593, "grad_norm": 1.500707983970642, "learning_rate": 0.00019810066877677982, "loss": 2.0719, "step": 2521 }, { "epoch": 0.09031819077121421, "grad_norm": 2.0746140480041504, "learning_rate": 0.00019809841821096086, "loss": 1.3798, "step": 2522 }, { "epoch": 0.09035400290078249, "grad_norm": 1.631017804145813, "learning_rate": 0.00019809616632535427, "loss": 1.6161, "step": 2523 }, { "epoch": 0.09038981503035078, "grad_norm": 1.3964747190475464, "learning_rate": 0.00019809391311999028, "loss": 1.7918, "step": 2524 }, { "epoch": 0.09042562715991906, "grad_norm": 1.8602650165557861, "learning_rate": 0.00019809165859489922, "loss": 1.7201, "step": 2525 }, { "epoch": 0.09046143928948736, "grad_norm": 1.7261444330215454, "learning_rate": 0.00019808940275011145, "loss": 1.7615, "step": 2526 }, { "epoch": 0.09049725141905564, "grad_norm": 1.6224415302276611, "learning_rate": 0.00019808714558565727, "loss": 1.1881, "step": 2527 }, { "epoch": 0.09053306354862392, "grad_norm": 1.8537174463272095, "learning_rate": 0.00019808488710156707, "loss": 1.7823, "step": 2528 }, { "epoch": 0.0905688756781922, "grad_norm": 2.799915313720703, "learning_rate": 0.0001980826272978712, "loss": 1.5794, "step": 2529 }, { "epoch": 0.09060468780776049, "grad_norm": 1.8718516826629639, "learning_rate": 0.00019808036617460016, "loss": 1.6623, "step": 2530 }, { "epoch": 0.09064049993732877, "grad_norm": 1.234557867050171, "learning_rate": 0.00019807810373178425, "loss": 1.5993, "step": 2531 }, { "epoch": 0.09067631206689705, "grad_norm": 1.8687853813171387, "learning_rate": 0.000198075839969454, "loss": 1.6352, "step": 2532 }, { "epoch": 0.09071212419646535, "grad_norm": 1.8785006999969482, "learning_rate": 0.00019807357488763985, "loss": 1.7187, "step": 2533 }, { "epoch": 0.09074793632603363, "grad_norm": 1.3195416927337646, "learning_rate": 0.00019807130848637224, "loss": 1.7064, "step": 2534 }, { "epoch": 0.09078374845560191, "grad_norm": 2.186288833618164, "learning_rate": 0.00019806904076568165, "loss": 1.5541, "step": 2535 }, { "epoch": 0.0908195605851702, "grad_norm": 1.6952531337738037, "learning_rate": 0.00019806677172559865, "loss": 1.3094, "step": 2536 }, { "epoch": 0.09085537271473848, "grad_norm": 4.69820499420166, "learning_rate": 0.00019806450136615372, "loss": 1.5628, "step": 2537 }, { "epoch": 0.09089118484430676, "grad_norm": 1.3726061582565308, "learning_rate": 0.0001980622296873774, "loss": 1.7239, "step": 2538 }, { "epoch": 0.09092699697387505, "grad_norm": 1.5278609991073608, "learning_rate": 0.0001980599566893003, "loss": 2.0344, "step": 2539 }, { "epoch": 0.09096280910344334, "grad_norm": 1.4546926021575928, "learning_rate": 0.00019805768237195296, "loss": 1.671, "step": 2540 }, { "epoch": 0.09099862123301163, "grad_norm": 2.053382396697998, "learning_rate": 0.00019805540673536597, "loss": 1.7412, "step": 2541 }, { "epoch": 0.09103443336257991, "grad_norm": 2.1475985050201416, "learning_rate": 0.00019805312977956997, "loss": 1.3434, "step": 2542 }, { "epoch": 0.09107024549214819, "grad_norm": 2.8895585536956787, "learning_rate": 0.0001980508515045956, "loss": 1.545, "step": 2543 }, { "epoch": 0.09110605762171647, "grad_norm": 1.509041428565979, "learning_rate": 0.00019804857191047353, "loss": 1.936, "step": 2544 }, { "epoch": 0.09114186975128476, "grad_norm": 2.280407190322876, "learning_rate": 0.00019804629099723435, "loss": 1.9202, "step": 2545 }, { "epoch": 0.09117768188085304, "grad_norm": 2.5649526119232178, "learning_rate": 0.00019804400876490883, "loss": 1.7697, "step": 2546 }, { "epoch": 0.09121349401042134, "grad_norm": 2.6087114810943604, "learning_rate": 0.00019804172521352761, "loss": 1.6824, "step": 2547 }, { "epoch": 0.09124930613998962, "grad_norm": 1.8538936376571655, "learning_rate": 0.00019803944034312148, "loss": 1.6512, "step": 2548 }, { "epoch": 0.0912851182695579, "grad_norm": 2.424487352371216, "learning_rate": 0.0001980371541537211, "loss": 1.4249, "step": 2549 }, { "epoch": 0.09132093039912619, "grad_norm": 1.9333043098449707, "learning_rate": 0.0001980348666453573, "loss": 1.8535, "step": 2550 }, { "epoch": 0.09135674252869447, "grad_norm": 1.7312296628952026, "learning_rate": 0.00019803257781806082, "loss": 1.7266, "step": 2551 }, { "epoch": 0.09139255465826275, "grad_norm": 1.5011173486709595, "learning_rate": 0.00019803028767186246, "loss": 1.7465, "step": 2552 }, { "epoch": 0.09142836678783103, "grad_norm": 2.4707727432250977, "learning_rate": 0.000198027996206793, "loss": 1.6072, "step": 2553 }, { "epoch": 0.09146417891739932, "grad_norm": 3.124415159225464, "learning_rate": 0.0001980257034228833, "loss": 1.8117, "step": 2554 }, { "epoch": 0.09149999104696761, "grad_norm": 1.7809544801712036, "learning_rate": 0.00019802340932016424, "loss": 1.7285, "step": 2555 }, { "epoch": 0.0915358031765359, "grad_norm": 1.9475129842758179, "learning_rate": 0.00019802111389866664, "loss": 1.6601, "step": 2556 }, { "epoch": 0.09157161530610418, "grad_norm": 1.3887070417404175, "learning_rate": 0.00019801881715842136, "loss": 1.5281, "step": 2557 }, { "epoch": 0.09160742743567246, "grad_norm": 1.341725468635559, "learning_rate": 0.00019801651909945935, "loss": 1.5264, "step": 2558 }, { "epoch": 0.09164323956524074, "grad_norm": 1.8760310411453247, "learning_rate": 0.0001980142197218115, "loss": 1.6517, "step": 2559 }, { "epoch": 0.09167905169480903, "grad_norm": 2.533712863922119, "learning_rate": 0.0001980119190255088, "loss": 1.6518, "step": 2560 }, { "epoch": 0.09171486382437731, "grad_norm": 2.026277780532837, "learning_rate": 0.0001980096170105821, "loss": 1.5654, "step": 2561 }, { "epoch": 0.0917506759539456, "grad_norm": 2.1434755325317383, "learning_rate": 0.00019800731367706248, "loss": 1.7728, "step": 2562 }, { "epoch": 0.09178648808351389, "grad_norm": 1.6237499713897705, "learning_rate": 0.0001980050090249808, "loss": 1.651, "step": 2563 }, { "epoch": 0.09182230021308217, "grad_norm": 1.460422396659851, "learning_rate": 0.0001980027030543682, "loss": 1.5255, "step": 2564 }, { "epoch": 0.09185811234265046, "grad_norm": 1.3004599809646606, "learning_rate": 0.00019800039576525562, "loss": 1.7805, "step": 2565 }, { "epoch": 0.09189392447221874, "grad_norm": 1.7502729892730713, "learning_rate": 0.00019799808715767413, "loss": 1.8473, "step": 2566 }, { "epoch": 0.09192973660178702, "grad_norm": 1.482386827468872, "learning_rate": 0.00019799577723165479, "loss": 1.647, "step": 2567 }, { "epoch": 0.0919655487313553, "grad_norm": 2.2665460109710693, "learning_rate": 0.0001979934659872287, "loss": 1.8273, "step": 2568 }, { "epoch": 0.0920013608609236, "grad_norm": 1.5462538003921509, "learning_rate": 0.00019799115342442687, "loss": 1.6563, "step": 2569 }, { "epoch": 0.09203717299049188, "grad_norm": 1.4237371683120728, "learning_rate": 0.0001979888395432805, "loss": 1.7318, "step": 2570 }, { "epoch": 0.09207298512006017, "grad_norm": 1.7229256629943848, "learning_rate": 0.00019798652434382068, "loss": 1.7655, "step": 2571 }, { "epoch": 0.09210879724962845, "grad_norm": 1.699660062789917, "learning_rate": 0.0001979842078260786, "loss": 1.4854, "step": 2572 }, { "epoch": 0.09214460937919673, "grad_norm": 1.2170562744140625, "learning_rate": 0.00019798188999008536, "loss": 1.2979, "step": 2573 }, { "epoch": 0.09218042150876501, "grad_norm": 1.5595293045043945, "learning_rate": 0.00019797957083587218, "loss": 1.4223, "step": 2574 }, { "epoch": 0.0922162336383333, "grad_norm": 1.6665608882904053, "learning_rate": 0.00019797725036347025, "loss": 1.5744, "step": 2575 }, { "epoch": 0.0922520457679016, "grad_norm": 1.5953999757766724, "learning_rate": 0.00019797492857291085, "loss": 1.4263, "step": 2576 }, { "epoch": 0.09228785789746988, "grad_norm": 1.8051010370254517, "learning_rate": 0.00019797260546422512, "loss": 2.0402, "step": 2577 }, { "epoch": 0.09232367002703816, "grad_norm": 2.281867027282715, "learning_rate": 0.00019797028103744438, "loss": 1.7244, "step": 2578 }, { "epoch": 0.09235948215660644, "grad_norm": 1.6062850952148438, "learning_rate": 0.00019796795529259986, "loss": 1.6111, "step": 2579 }, { "epoch": 0.09239529428617473, "grad_norm": 1.4826256036758423, "learning_rate": 0.0001979656282297229, "loss": 1.442, "step": 2580 }, { "epoch": 0.09243110641574301, "grad_norm": 2.0289320945739746, "learning_rate": 0.00019796329984884473, "loss": 1.986, "step": 2581 }, { "epoch": 0.09246691854531129, "grad_norm": 1.5660099983215332, "learning_rate": 0.00019796097014999678, "loss": 1.7942, "step": 2582 }, { "epoch": 0.09250273067487959, "grad_norm": 1.9263335466384888, "learning_rate": 0.0001979586391332103, "loss": 1.7872, "step": 2583 }, { "epoch": 0.09253854280444787, "grad_norm": 1.0887078046798706, "learning_rate": 0.0001979563067985167, "loss": 1.591, "step": 2584 }, { "epoch": 0.09257435493401615, "grad_norm": 1.3899720907211304, "learning_rate": 0.00019795397314594735, "loss": 1.4923, "step": 2585 }, { "epoch": 0.09261016706358444, "grad_norm": 1.539726972579956, "learning_rate": 0.00019795163817553363, "loss": 1.5147, "step": 2586 }, { "epoch": 0.09264597919315272, "grad_norm": 2.2429847717285156, "learning_rate": 0.000197949301887307, "loss": 1.6219, "step": 2587 }, { "epoch": 0.092681791322721, "grad_norm": 1.6642656326293945, "learning_rate": 0.00019794696428129883, "loss": 1.3772, "step": 2588 }, { "epoch": 0.09271760345228929, "grad_norm": 1.2310926914215088, "learning_rate": 0.0001979446253575406, "loss": 1.5615, "step": 2589 }, { "epoch": 0.09275341558185758, "grad_norm": 2.3761885166168213, "learning_rate": 0.00019794228511606376, "loss": 1.5446, "step": 2590 }, { "epoch": 0.09278922771142586, "grad_norm": 1.7739567756652832, "learning_rate": 0.00019793994355689985, "loss": 1.5243, "step": 2591 }, { "epoch": 0.09282503984099415, "grad_norm": 1.9059218168258667, "learning_rate": 0.0001979376006800803, "loss": 1.5244, "step": 2592 }, { "epoch": 0.09286085197056243, "grad_norm": 2.342510938644409, "learning_rate": 0.00019793525648563668, "loss": 1.6978, "step": 2593 }, { "epoch": 0.09289666410013071, "grad_norm": 1.4292327165603638, "learning_rate": 0.0001979329109736005, "loss": 1.78, "step": 2594 }, { "epoch": 0.092932476229699, "grad_norm": 2.344578504562378, "learning_rate": 0.00019793056414400332, "loss": 1.8671, "step": 2595 }, { "epoch": 0.09296828835926728, "grad_norm": 1.9726289510726929, "learning_rate": 0.00019792821599687676, "loss": 1.5574, "step": 2596 }, { "epoch": 0.09300410048883558, "grad_norm": 1.629486322402954, "learning_rate": 0.00019792586653225237, "loss": 1.5039, "step": 2597 }, { "epoch": 0.09303991261840386, "grad_norm": 1.6008448600769043, "learning_rate": 0.00019792351575016173, "loss": 1.5486, "step": 2598 }, { "epoch": 0.09307572474797214, "grad_norm": 1.9387474060058594, "learning_rate": 0.0001979211636506365, "loss": 1.5127, "step": 2599 }, { "epoch": 0.09311153687754042, "grad_norm": 2.0888354778289795, "learning_rate": 0.0001979188102337083, "loss": 1.6695, "step": 2600 }, { "epoch": 0.0931473490071087, "grad_norm": 1.6669467687606812, "learning_rate": 0.00019791645549940886, "loss": 1.6549, "step": 2601 }, { "epoch": 0.09318316113667699, "grad_norm": 1.3782527446746826, "learning_rate": 0.0001979140994477698, "loss": 1.6938, "step": 2602 }, { "epoch": 0.09321897326624527, "grad_norm": 1.7212045192718506, "learning_rate": 0.00019791174207882284, "loss": 1.3562, "step": 2603 }, { "epoch": 0.09325478539581356, "grad_norm": 1.2293366193771362, "learning_rate": 0.00019790938339259967, "loss": 1.65, "step": 2604 }, { "epoch": 0.09329059752538185, "grad_norm": 1.813463568687439, "learning_rate": 0.00019790702338913204, "loss": 1.5209, "step": 2605 }, { "epoch": 0.09332640965495013, "grad_norm": 1.8608582019805908, "learning_rate": 0.0001979046620684517, "loss": 1.5321, "step": 2606 }, { "epoch": 0.09336222178451842, "grad_norm": 2.023027181625366, "learning_rate": 0.00019790229943059045, "loss": 1.6548, "step": 2607 }, { "epoch": 0.0933980339140867, "grad_norm": 1.4660706520080566, "learning_rate": 0.00019789993547558, "loss": 1.7817, "step": 2608 }, { "epoch": 0.09343384604365498, "grad_norm": 1.455670714378357, "learning_rate": 0.00019789757020345224, "loss": 1.6645, "step": 2609 }, { "epoch": 0.09346965817322327, "grad_norm": 1.8113867044448853, "learning_rate": 0.00019789520361423893, "loss": 1.7678, "step": 2610 }, { "epoch": 0.09350547030279155, "grad_norm": 1.1966438293457031, "learning_rate": 0.00019789283570797192, "loss": 1.7918, "step": 2611 }, { "epoch": 0.09354128243235985, "grad_norm": 1.6652956008911133, "learning_rate": 0.0001978904664846831, "loss": 1.5339, "step": 2612 }, { "epoch": 0.09357709456192813, "grad_norm": 1.756519079208374, "learning_rate": 0.00019788809594440432, "loss": 1.5227, "step": 2613 }, { "epoch": 0.09361290669149641, "grad_norm": 1.711736798286438, "learning_rate": 0.00019788572408716747, "loss": 1.8176, "step": 2614 }, { "epoch": 0.0936487188210647, "grad_norm": 2.1952521800994873, "learning_rate": 0.00019788335091300448, "loss": 1.7203, "step": 2615 }, { "epoch": 0.09368453095063298, "grad_norm": 1.3425790071487427, "learning_rate": 0.00019788097642194725, "loss": 1.7342, "step": 2616 }, { "epoch": 0.09372034308020126, "grad_norm": 1.7646204233169556, "learning_rate": 0.00019787860061402774, "loss": 1.5705, "step": 2617 }, { "epoch": 0.09375615520976954, "grad_norm": 2.6315975189208984, "learning_rate": 0.00019787622348927793, "loss": 1.5904, "step": 2618 }, { "epoch": 0.09379196733933784, "grad_norm": 2.2786176204681396, "learning_rate": 0.00019787384504772976, "loss": 1.4719, "step": 2619 }, { "epoch": 0.09382777946890612, "grad_norm": 3.4464972019195557, "learning_rate": 0.00019787146528941528, "loss": 2.1013, "step": 2620 }, { "epoch": 0.0938635915984744, "grad_norm": 1.4851138591766357, "learning_rate": 0.00019786908421436645, "loss": 1.6989, "step": 2621 }, { "epoch": 0.09389940372804269, "grad_norm": 1.547202229499817, "learning_rate": 0.00019786670182261534, "loss": 1.8292, "step": 2622 }, { "epoch": 0.09393521585761097, "grad_norm": 1.8076845407485962, "learning_rate": 0.00019786431811419402, "loss": 1.5035, "step": 2623 }, { "epoch": 0.09397102798717925, "grad_norm": 2.3341751098632812, "learning_rate": 0.0001978619330891345, "loss": 2.0307, "step": 2624 }, { "epoch": 0.09400684011674754, "grad_norm": 2.7499022483825684, "learning_rate": 0.0001978595467474689, "loss": 1.51, "step": 2625 }, { "epoch": 0.09404265224631583, "grad_norm": 1.925464153289795, "learning_rate": 0.00019785715908922938, "loss": 1.4552, "step": 2626 }, { "epoch": 0.09407846437588412, "grad_norm": 2.460425853729248, "learning_rate": 0.00019785477011444798, "loss": 1.5344, "step": 2627 }, { "epoch": 0.0941142765054524, "grad_norm": 1.5089377164840698, "learning_rate": 0.00019785237982315686, "loss": 1.6439, "step": 2628 }, { "epoch": 0.09415008863502068, "grad_norm": 1.501597285270691, "learning_rate": 0.0001978499882153882, "loss": 1.5609, "step": 2629 }, { "epoch": 0.09418590076458896, "grad_norm": 1.284104824066162, "learning_rate": 0.00019784759529117415, "loss": 1.4742, "step": 2630 }, { "epoch": 0.09422171289415725, "grad_norm": 2.3818297386169434, "learning_rate": 0.000197845201050547, "loss": 1.5001, "step": 2631 }, { "epoch": 0.09425752502372553, "grad_norm": 1.6480205059051514, "learning_rate": 0.0001978428054935388, "loss": 1.5732, "step": 2632 }, { "epoch": 0.09429333715329383, "grad_norm": 2.133923053741455, "learning_rate": 0.00019784040862018184, "loss": 1.5003, "step": 2633 }, { "epoch": 0.09432914928286211, "grad_norm": 1.5461385250091553, "learning_rate": 0.00019783801043050844, "loss": 1.4475, "step": 2634 }, { "epoch": 0.09436496141243039, "grad_norm": 1.5656708478927612, "learning_rate": 0.0001978356109245508, "loss": 1.5606, "step": 2635 }, { "epoch": 0.09440077354199868, "grad_norm": 1.454704999923706, "learning_rate": 0.00019783321010234122, "loss": 1.643, "step": 2636 }, { "epoch": 0.09443658567156696, "grad_norm": 1.9194536209106445, "learning_rate": 0.000197830807963912, "loss": 1.4815, "step": 2637 }, { "epoch": 0.09447239780113524, "grad_norm": 1.1359739303588867, "learning_rate": 0.00019782840450929543, "loss": 1.6183, "step": 2638 }, { "epoch": 0.09450820993070352, "grad_norm": 1.4093936681747437, "learning_rate": 0.00019782599973852387, "loss": 1.7041, "step": 2639 }, { "epoch": 0.09454402206027182, "grad_norm": 1.9672014713287354, "learning_rate": 0.0001978235936516297, "loss": 1.4104, "step": 2640 }, { "epoch": 0.0945798341898401, "grad_norm": 1.2967243194580078, "learning_rate": 0.0001978211862486452, "loss": 1.5651, "step": 2641 }, { "epoch": 0.09461564631940839, "grad_norm": 1.4249886274337769, "learning_rate": 0.00019781877752960285, "loss": 1.6584, "step": 2642 }, { "epoch": 0.09465145844897667, "grad_norm": 1.4482895135879517, "learning_rate": 0.00019781636749453504, "loss": 1.6222, "step": 2643 }, { "epoch": 0.09468727057854495, "grad_norm": 1.9295979738235474, "learning_rate": 0.00019781395614347415, "loss": 1.778, "step": 2644 }, { "epoch": 0.09472308270811323, "grad_norm": 1.6828997135162354, "learning_rate": 0.0001978115434764527, "loss": 1.602, "step": 2645 }, { "epoch": 0.09475889483768152, "grad_norm": 1.602980375289917, "learning_rate": 0.00019780912949350307, "loss": 1.5946, "step": 2646 }, { "epoch": 0.09479470696724981, "grad_norm": 1.2852801084518433, "learning_rate": 0.00019780671419465776, "loss": 1.3936, "step": 2647 }, { "epoch": 0.0948305190968181, "grad_norm": 3.3350985050201416, "learning_rate": 0.00019780429757994928, "loss": 1.587, "step": 2648 }, { "epoch": 0.09486633122638638, "grad_norm": 1.130829930305481, "learning_rate": 0.00019780187964941011, "loss": 1.479, "step": 2649 }, { "epoch": 0.09490214335595466, "grad_norm": 1.76201593875885, "learning_rate": 0.00019779946040307284, "loss": 1.5925, "step": 2650 }, { "epoch": 0.09493795548552295, "grad_norm": 2.2327420711517334, "learning_rate": 0.00019779703984096998, "loss": 1.6929, "step": 2651 }, { "epoch": 0.09497376761509123, "grad_norm": 2.0052194595336914, "learning_rate": 0.00019779461796313408, "loss": 1.4586, "step": 2652 }, { "epoch": 0.09500957974465951, "grad_norm": 1.6383410692214966, "learning_rate": 0.00019779219476959777, "loss": 1.4045, "step": 2653 }, { "epoch": 0.0950453918742278, "grad_norm": 2.5757429599761963, "learning_rate": 0.0001977897702603936, "loss": 1.7264, "step": 2654 }, { "epoch": 0.09508120400379609, "grad_norm": 2.200880289077759, "learning_rate": 0.0001977873444355542, "loss": 1.7437, "step": 2655 }, { "epoch": 0.09511701613336437, "grad_norm": 2.070451498031616, "learning_rate": 0.00019778491729511224, "loss": 1.634, "step": 2656 }, { "epoch": 0.09515282826293266, "grad_norm": 1.4518322944641113, "learning_rate": 0.00019778248883910035, "loss": 1.7425, "step": 2657 }, { "epoch": 0.09518864039250094, "grad_norm": 1.1998945474624634, "learning_rate": 0.0001977800590675512, "loss": 1.4792, "step": 2658 }, { "epoch": 0.09522445252206922, "grad_norm": 1.4403979778289795, "learning_rate": 0.0001977776279804975, "loss": 1.5377, "step": 2659 }, { "epoch": 0.0952602646516375, "grad_norm": 1.753049373626709, "learning_rate": 0.0001977751955779719, "loss": 1.1242, "step": 2660 }, { "epoch": 0.09529607678120579, "grad_norm": 2.60457706451416, "learning_rate": 0.00019777276186000716, "loss": 1.3977, "step": 2661 }, { "epoch": 0.09533188891077408, "grad_norm": 2.1619999408721924, "learning_rate": 0.00019777032682663607, "loss": 1.8488, "step": 2662 }, { "epoch": 0.09536770104034237, "grad_norm": 1.300704002380371, "learning_rate": 0.00019776789047789133, "loss": 1.7853, "step": 2663 }, { "epoch": 0.09540351316991065, "grad_norm": 1.6537222862243652, "learning_rate": 0.0001977654528138057, "loss": 1.5626, "step": 2664 }, { "epoch": 0.09543932529947893, "grad_norm": 1.322972059249878, "learning_rate": 0.00019776301383441207, "loss": 1.5224, "step": 2665 }, { "epoch": 0.09547513742904722, "grad_norm": 1.761858582496643, "learning_rate": 0.00019776057353974315, "loss": 1.7065, "step": 2666 }, { "epoch": 0.0955109495586155, "grad_norm": 1.886605143547058, "learning_rate": 0.00019775813192983183, "loss": 1.2394, "step": 2667 }, { "epoch": 0.09554676168818378, "grad_norm": 2.51098370552063, "learning_rate": 0.00019775568900471096, "loss": 1.3918, "step": 2668 }, { "epoch": 0.09558257381775208, "grad_norm": 2.1192142963409424, "learning_rate": 0.00019775324476441336, "loss": 1.4485, "step": 2669 }, { "epoch": 0.09561838594732036, "grad_norm": 2.188304901123047, "learning_rate": 0.00019775079920897196, "loss": 1.7083, "step": 2670 }, { "epoch": 0.09565419807688864, "grad_norm": 1.3558300733566284, "learning_rate": 0.00019774835233841965, "loss": 1.7486, "step": 2671 }, { "epoch": 0.09569001020645693, "grad_norm": 1.7164361476898193, "learning_rate": 0.00019774590415278933, "loss": 1.4583, "step": 2672 }, { "epoch": 0.09572582233602521, "grad_norm": 1.419765830039978, "learning_rate": 0.00019774345465211398, "loss": 2.0221, "step": 2673 }, { "epoch": 0.09576163446559349, "grad_norm": 1.728421926498413, "learning_rate": 0.00019774100383642651, "loss": 1.3956, "step": 2674 }, { "epoch": 0.09579744659516178, "grad_norm": 2.3539481163024902, "learning_rate": 0.0001977385517057599, "loss": 1.6057, "step": 2675 }, { "epoch": 0.09583325872473007, "grad_norm": 2.140263319015503, "learning_rate": 0.00019773609826014718, "loss": 1.7721, "step": 2676 }, { "epoch": 0.09586907085429835, "grad_norm": 1.8717950582504272, "learning_rate": 0.0001977336434996213, "loss": 1.3382, "step": 2677 }, { "epoch": 0.09590488298386664, "grad_norm": 1.4856847524642944, "learning_rate": 0.00019773118742421532, "loss": 1.4095, "step": 2678 }, { "epoch": 0.09594069511343492, "grad_norm": 2.1832220554351807, "learning_rate": 0.00019772873003396228, "loss": 1.6058, "step": 2679 }, { "epoch": 0.0959765072430032, "grad_norm": 1.603605031967163, "learning_rate": 0.00019772627132889526, "loss": 1.5258, "step": 2680 }, { "epoch": 0.09601231937257149, "grad_norm": 1.6799068450927734, "learning_rate": 0.00019772381130904728, "loss": 1.6569, "step": 2681 }, { "epoch": 0.09604813150213977, "grad_norm": 1.3893667459487915, "learning_rate": 0.0001977213499744515, "loss": 1.6977, "step": 2682 }, { "epoch": 0.09608394363170807, "grad_norm": 2.7440836429595947, "learning_rate": 0.00019771888732514098, "loss": 1.495, "step": 2683 }, { "epoch": 0.09611975576127635, "grad_norm": 1.2910890579223633, "learning_rate": 0.00019771642336114892, "loss": 1.1927, "step": 2684 }, { "epoch": 0.09615556789084463, "grad_norm": 1.6925691366195679, "learning_rate": 0.0001977139580825084, "loss": 1.2957, "step": 2685 }, { "epoch": 0.09619138002041291, "grad_norm": 2.3331246376037598, "learning_rate": 0.0001977114914892526, "loss": 1.7952, "step": 2686 }, { "epoch": 0.0962271921499812, "grad_norm": 2.0909788608551025, "learning_rate": 0.00019770902358141478, "loss": 1.4007, "step": 2687 }, { "epoch": 0.09626300427954948, "grad_norm": 1.5967774391174316, "learning_rate": 0.00019770655435902805, "loss": 1.4887, "step": 2688 }, { "epoch": 0.09629881640911776, "grad_norm": 1.9621723890304565, "learning_rate": 0.00019770408382212564, "loss": 1.1734, "step": 2689 }, { "epoch": 0.09633462853868606, "grad_norm": 1.2921861410140991, "learning_rate": 0.00019770161197074084, "loss": 1.6445, "step": 2690 }, { "epoch": 0.09637044066825434, "grad_norm": 1.2130435705184937, "learning_rate": 0.00019769913880490688, "loss": 1.6201, "step": 2691 }, { "epoch": 0.09640625279782263, "grad_norm": 1.7521440982818604, "learning_rate": 0.000197696664324657, "loss": 1.6516, "step": 2692 }, { "epoch": 0.09644206492739091, "grad_norm": 1.5702006816864014, "learning_rate": 0.00019769418853002454, "loss": 1.2543, "step": 2693 }, { "epoch": 0.09647787705695919, "grad_norm": 1.7723594903945923, "learning_rate": 0.0001976917114210428, "loss": 1.5552, "step": 2694 }, { "epoch": 0.09651368918652747, "grad_norm": 1.4246082305908203, "learning_rate": 0.00019768923299774506, "loss": 1.6263, "step": 2695 }, { "epoch": 0.09654950131609576, "grad_norm": 1.6311163902282715, "learning_rate": 0.00019768675326016475, "loss": 1.5833, "step": 2696 }, { "epoch": 0.09658531344566405, "grad_norm": 1.1841009855270386, "learning_rate": 0.00019768427220833514, "loss": 1.3689, "step": 2697 }, { "epoch": 0.09662112557523234, "grad_norm": 1.5306531190872192, "learning_rate": 0.00019768178984228967, "loss": 1.8065, "step": 2698 }, { "epoch": 0.09665693770480062, "grad_norm": 1.5561325550079346, "learning_rate": 0.00019767930616206174, "loss": 1.6343, "step": 2699 }, { "epoch": 0.0966927498343689, "grad_norm": 2.01039457321167, "learning_rate": 0.00019767682116768472, "loss": 1.4323, "step": 2700 }, { "epoch": 0.09672856196393718, "grad_norm": 1.3528622388839722, "learning_rate": 0.00019767433485919206, "loss": 1.5957, "step": 2701 }, { "epoch": 0.09676437409350547, "grad_norm": 1.388312816619873, "learning_rate": 0.0001976718472366172, "loss": 1.5354, "step": 2702 }, { "epoch": 0.09680018622307375, "grad_norm": 1.7916830778121948, "learning_rate": 0.00019766935829999363, "loss": 1.7827, "step": 2703 }, { "epoch": 0.09683599835264203, "grad_norm": 2.035825729370117, "learning_rate": 0.00019766686804935488, "loss": 1.52, "step": 2704 }, { "epoch": 0.09687181048221033, "grad_norm": 1.4044291973114014, "learning_rate": 0.00019766437648473435, "loss": 1.778, "step": 2705 }, { "epoch": 0.09690762261177861, "grad_norm": 1.3465924263000488, "learning_rate": 0.00019766188360616563, "loss": 1.6834, "step": 2706 }, { "epoch": 0.0969434347413469, "grad_norm": 1.3629565238952637, "learning_rate": 0.00019765938941368222, "loss": 1.7601, "step": 2707 }, { "epoch": 0.09697924687091518, "grad_norm": 1.55349600315094, "learning_rate": 0.00019765689390731773, "loss": 1.8671, "step": 2708 }, { "epoch": 0.09701505900048346, "grad_norm": 1.5286920070648193, "learning_rate": 0.0001976543970871057, "loss": 1.6034, "step": 2709 }, { "epoch": 0.09705087113005174, "grad_norm": 1.5274664163589478, "learning_rate": 0.0001976518989530797, "loss": 1.7297, "step": 2710 }, { "epoch": 0.09708668325962003, "grad_norm": 1.5408055782318115, "learning_rate": 0.00019764939950527336, "loss": 1.5561, "step": 2711 }, { "epoch": 0.09712249538918832, "grad_norm": 1.8871009349822998, "learning_rate": 0.0001976468987437203, "loss": 1.5792, "step": 2712 }, { "epoch": 0.0971583075187566, "grad_norm": 1.9408537149429321, "learning_rate": 0.0001976443966684542, "loss": 1.6264, "step": 2713 }, { "epoch": 0.09719411964832489, "grad_norm": 1.510097861289978, "learning_rate": 0.00019764189327950869, "loss": 1.4198, "step": 2714 }, { "epoch": 0.09722993177789317, "grad_norm": 2.1380035877227783, "learning_rate": 0.00019763938857691744, "loss": 1.4642, "step": 2715 }, { "epoch": 0.09726574390746145, "grad_norm": 1.4172416925430298, "learning_rate": 0.00019763688256071418, "loss": 1.9416, "step": 2716 }, { "epoch": 0.09730155603702974, "grad_norm": 1.5462101697921753, "learning_rate": 0.0001976343752309326, "loss": 1.573, "step": 2717 }, { "epoch": 0.09733736816659802, "grad_norm": 1.9838913679122925, "learning_rate": 0.00019763186658760645, "loss": 1.7277, "step": 2718 }, { "epoch": 0.09737318029616632, "grad_norm": 1.450179100036621, "learning_rate": 0.00019762935663076946, "loss": 1.7552, "step": 2719 }, { "epoch": 0.0974089924257346, "grad_norm": 1.3784503936767578, "learning_rate": 0.00019762684536045542, "loss": 1.443, "step": 2720 }, { "epoch": 0.09744480455530288, "grad_norm": 2.3167805671691895, "learning_rate": 0.00019762433277669807, "loss": 1.8773, "step": 2721 }, { "epoch": 0.09748061668487117, "grad_norm": 2.927114486694336, "learning_rate": 0.00019762181887953128, "loss": 1.7365, "step": 2722 }, { "epoch": 0.09751642881443945, "grad_norm": 2.3608787059783936, "learning_rate": 0.00019761930366898883, "loss": 1.6947, "step": 2723 }, { "epoch": 0.09755224094400773, "grad_norm": 1.6629176139831543, "learning_rate": 0.0001976167871451046, "loss": 1.6643, "step": 2724 }, { "epoch": 0.09758805307357601, "grad_norm": 1.806070327758789, "learning_rate": 0.00019761426930791238, "loss": 2.0347, "step": 2725 }, { "epoch": 0.09762386520314431, "grad_norm": 2.381452798843384, "learning_rate": 0.00019761175015744605, "loss": 1.5381, "step": 2726 }, { "epoch": 0.0976596773327126, "grad_norm": 1.5285437107086182, "learning_rate": 0.0001976092296937396, "loss": 1.9806, "step": 2727 }, { "epoch": 0.09769548946228088, "grad_norm": 1.324546456336975, "learning_rate": 0.00019760670791682685, "loss": 1.7321, "step": 2728 }, { "epoch": 0.09773130159184916, "grad_norm": 1.9074602127075195, "learning_rate": 0.00019760418482674173, "loss": 1.6216, "step": 2729 }, { "epoch": 0.09776711372141744, "grad_norm": 1.6975016593933105, "learning_rate": 0.0001976016604235182, "loss": 1.7029, "step": 2730 }, { "epoch": 0.09780292585098573, "grad_norm": 1.9731216430664062, "learning_rate": 0.00019759913470719024, "loss": 1.5059, "step": 2731 }, { "epoch": 0.09783873798055401, "grad_norm": 1.32267165184021, "learning_rate": 0.00019759660767779184, "loss": 1.4878, "step": 2732 }, { "epoch": 0.0978745501101223, "grad_norm": 1.2084392309188843, "learning_rate": 0.00019759407933535693, "loss": 1.6624, "step": 2733 }, { "epoch": 0.09791036223969059, "grad_norm": 2.081899404525757, "learning_rate": 0.0001975915496799196, "loss": 1.7721, "step": 2734 }, { "epoch": 0.09794617436925887, "grad_norm": 2.2579267024993896, "learning_rate": 0.00019758901871151383, "loss": 1.3182, "step": 2735 }, { "epoch": 0.09798198649882715, "grad_norm": 1.7588104009628296, "learning_rate": 0.00019758648643017373, "loss": 1.2959, "step": 2736 }, { "epoch": 0.09801779862839544, "grad_norm": 1.9816704988479614, "learning_rate": 0.0001975839528359333, "loss": 1.4635, "step": 2737 }, { "epoch": 0.09805361075796372, "grad_norm": 1.833520770072937, "learning_rate": 0.00019758141792882667, "loss": 1.4191, "step": 2738 }, { "epoch": 0.098089422887532, "grad_norm": 1.7605247497558594, "learning_rate": 0.00019757888170888793, "loss": 1.3866, "step": 2739 }, { "epoch": 0.0981252350171003, "grad_norm": 2.2199249267578125, "learning_rate": 0.0001975763441761512, "loss": 1.7087, "step": 2740 }, { "epoch": 0.09816104714666858, "grad_norm": 1.999975562095642, "learning_rate": 0.00019757380533065065, "loss": 1.8079, "step": 2741 }, { "epoch": 0.09819685927623686, "grad_norm": 1.5539270639419556, "learning_rate": 0.00019757126517242038, "loss": 1.7708, "step": 2742 }, { "epoch": 0.09823267140580515, "grad_norm": 1.6064941883087158, "learning_rate": 0.0001975687237014946, "loss": 1.7572, "step": 2743 }, { "epoch": 0.09826848353537343, "grad_norm": 1.6434097290039062, "learning_rate": 0.0001975661809179075, "loss": 1.6864, "step": 2744 }, { "epoch": 0.09830429566494171, "grad_norm": 1.3108112812042236, "learning_rate": 0.0001975636368216933, "loss": 1.4519, "step": 2745 }, { "epoch": 0.09834010779451, "grad_norm": 1.4596123695373535, "learning_rate": 0.0001975610914128862, "loss": 1.8373, "step": 2746 }, { "epoch": 0.09837591992407829, "grad_norm": 1.9536592960357666, "learning_rate": 0.00019755854469152045, "loss": 1.2695, "step": 2747 }, { "epoch": 0.09841173205364658, "grad_norm": 1.4774552583694458, "learning_rate": 0.00019755599665763037, "loss": 1.4301, "step": 2748 }, { "epoch": 0.09844754418321486, "grad_norm": 1.8306077718734741, "learning_rate": 0.00019755344731125013, "loss": 1.7258, "step": 2749 }, { "epoch": 0.09848335631278314, "grad_norm": 1.3312674760818481, "learning_rate": 0.00019755089665241413, "loss": 1.6966, "step": 2750 }, { "epoch": 0.09851916844235142, "grad_norm": 1.9812297821044922, "learning_rate": 0.00019754834468115664, "loss": 1.7699, "step": 2751 }, { "epoch": 0.0985549805719197, "grad_norm": 1.5077332258224487, "learning_rate": 0.00019754579139751198, "loss": 1.82, "step": 2752 }, { "epoch": 0.09859079270148799, "grad_norm": 1.7618402242660522, "learning_rate": 0.00019754323680151457, "loss": 1.4261, "step": 2753 }, { "epoch": 0.09862660483105627, "grad_norm": 2.45039439201355, "learning_rate": 0.00019754068089319869, "loss": 1.6046, "step": 2754 }, { "epoch": 0.09866241696062457, "grad_norm": 1.6822230815887451, "learning_rate": 0.00019753812367259878, "loss": 1.4612, "step": 2755 }, { "epoch": 0.09869822909019285, "grad_norm": 1.5892387628555298, "learning_rate": 0.00019753556513974922, "loss": 1.5376, "step": 2756 }, { "epoch": 0.09873404121976113, "grad_norm": 1.8028465509414673, "learning_rate": 0.00019753300529468446, "loss": 1.6662, "step": 2757 }, { "epoch": 0.09876985334932942, "grad_norm": 1.4199618101119995, "learning_rate": 0.00019753044413743892, "loss": 1.7844, "step": 2758 }, { "epoch": 0.0988056654788977, "grad_norm": 2.244751453399658, "learning_rate": 0.00019752788166804702, "loss": 1.7328, "step": 2759 }, { "epoch": 0.09884147760846598, "grad_norm": 1.673805594444275, "learning_rate": 0.0001975253178865433, "loss": 1.6421, "step": 2760 }, { "epoch": 0.09887728973803427, "grad_norm": 1.4417093992233276, "learning_rate": 0.00019752275279296227, "loss": 1.6409, "step": 2761 }, { "epoch": 0.09891310186760256, "grad_norm": 1.2416033744812012, "learning_rate": 0.00019752018638733836, "loss": 1.673, "step": 2762 }, { "epoch": 0.09894891399717085, "grad_norm": 1.481547236442566, "learning_rate": 0.00019751761866970612, "loss": 1.7885, "step": 2763 }, { "epoch": 0.09898472612673913, "grad_norm": 1.45943284034729, "learning_rate": 0.00019751504964010016, "loss": 1.9124, "step": 2764 }, { "epoch": 0.09902053825630741, "grad_norm": 1.5683441162109375, "learning_rate": 0.00019751247929855495, "loss": 1.3896, "step": 2765 }, { "epoch": 0.0990563503858757, "grad_norm": 1.5302742719650269, "learning_rate": 0.0001975099076451051, "loss": 1.5202, "step": 2766 }, { "epoch": 0.09909216251544398, "grad_norm": 2.1754040718078613, "learning_rate": 0.00019750733467978525, "loss": 1.6422, "step": 2767 }, { "epoch": 0.09912797464501226, "grad_norm": 2.2145087718963623, "learning_rate": 0.00019750476040262998, "loss": 2.0339, "step": 2768 }, { "epoch": 0.09916378677458056, "grad_norm": 1.463472843170166, "learning_rate": 0.00019750218481367392, "loss": 1.6032, "step": 2769 }, { "epoch": 0.09919959890414884, "grad_norm": 1.4679081439971924, "learning_rate": 0.00019749960791295174, "loss": 1.702, "step": 2770 }, { "epoch": 0.09923541103371712, "grad_norm": 1.6745630502700806, "learning_rate": 0.0001974970297004981, "loss": 1.872, "step": 2771 }, { "epoch": 0.0992712231632854, "grad_norm": 1.3749425411224365, "learning_rate": 0.0001974944501763477, "loss": 1.5595, "step": 2772 }, { "epoch": 0.09930703529285369, "grad_norm": 1.8216643333435059, "learning_rate": 0.0001974918693405352, "loss": 1.5808, "step": 2773 }, { "epoch": 0.09934284742242197, "grad_norm": 1.7504353523254395, "learning_rate": 0.0001974892871930954, "loss": 1.6087, "step": 2774 }, { "epoch": 0.09937865955199025, "grad_norm": 1.5744881629943848, "learning_rate": 0.00019748670373406294, "loss": 1.7731, "step": 2775 }, { "epoch": 0.09941447168155855, "grad_norm": 2.4953949451446533, "learning_rate": 0.00019748411896347267, "loss": 1.6093, "step": 2776 }, { "epoch": 0.09945028381112683, "grad_norm": 1.4541890621185303, "learning_rate": 0.00019748153288135932, "loss": 1.7734, "step": 2777 }, { "epoch": 0.09948609594069512, "grad_norm": 1.250166654586792, "learning_rate": 0.0001974789454877577, "loss": 1.3861, "step": 2778 }, { "epoch": 0.0995219080702634, "grad_norm": 1.8420655727386475, "learning_rate": 0.0001974763567827026, "loss": 1.5565, "step": 2779 }, { "epoch": 0.09955772019983168, "grad_norm": 1.5533993244171143, "learning_rate": 0.00019747376676622878, "loss": 1.5603, "step": 2780 }, { "epoch": 0.09959353232939996, "grad_norm": 1.4248117208480835, "learning_rate": 0.00019747117543837125, "loss": 1.8146, "step": 2781 }, { "epoch": 0.09962934445896825, "grad_norm": 1.520105242729187, "learning_rate": 0.00019746858279916476, "loss": 1.3316, "step": 2782 }, { "epoch": 0.09966515658853654, "grad_norm": 1.8025250434875488, "learning_rate": 0.0001974659888486442, "loss": 1.903, "step": 2783 }, { "epoch": 0.09970096871810483, "grad_norm": 1.3632969856262207, "learning_rate": 0.0001974633935868445, "loss": 1.41, "step": 2784 }, { "epoch": 0.09973678084767311, "grad_norm": 1.2988522052764893, "learning_rate": 0.00019746079701380055, "loss": 1.6359, "step": 2785 }, { "epoch": 0.09977259297724139, "grad_norm": 2.9737942218780518, "learning_rate": 0.00019745819912954732, "loss": 1.5823, "step": 2786 }, { "epoch": 0.09980840510680968, "grad_norm": 1.4505422115325928, "learning_rate": 0.00019745559993411966, "loss": 1.5382, "step": 2787 }, { "epoch": 0.09984421723637796, "grad_norm": 1.6142643690109253, "learning_rate": 0.00019745299942755266, "loss": 1.6859, "step": 2788 }, { "epoch": 0.09988002936594624, "grad_norm": 1.6316615343093872, "learning_rate": 0.00019745039760988127, "loss": 1.6503, "step": 2789 }, { "epoch": 0.09991584149551454, "grad_norm": 2.2728500366210938, "learning_rate": 0.00019744779448114047, "loss": 1.2704, "step": 2790 }, { "epoch": 0.09995165362508282, "grad_norm": 1.512316346168518, "learning_rate": 0.00019744519004136527, "loss": 1.269, "step": 2791 }, { "epoch": 0.0999874657546511, "grad_norm": 1.5691752433776855, "learning_rate": 0.00019744258429059075, "loss": 1.6717, "step": 2792 }, { "epoch": 0.10002327788421939, "grad_norm": 1.5791586637496948, "learning_rate": 0.00019743997722885198, "loss": 1.6228, "step": 2793 }, { "epoch": 0.10005909001378767, "grad_norm": 1.5813857316970825, "learning_rate": 0.00019743736885618395, "loss": 1.3792, "step": 2794 }, { "epoch": 0.10009490214335595, "grad_norm": 2.116391897201538, "learning_rate": 0.00019743475917262187, "loss": 1.5369, "step": 2795 }, { "epoch": 0.10013071427292423, "grad_norm": 1.7743563652038574, "learning_rate": 0.00019743214817820074, "loss": 1.677, "step": 2796 }, { "epoch": 0.10016652640249253, "grad_norm": 1.637967586517334, "learning_rate": 0.00019742953587295573, "loss": 1.6838, "step": 2797 }, { "epoch": 0.10020233853206081, "grad_norm": 1.7225396633148193, "learning_rate": 0.000197426922256922, "loss": 1.7073, "step": 2798 }, { "epoch": 0.1002381506616291, "grad_norm": 1.4051920175552368, "learning_rate": 0.00019742430733013473, "loss": 1.3682, "step": 2799 }, { "epoch": 0.10027396279119738, "grad_norm": 1.5960766077041626, "learning_rate": 0.00019742169109262904, "loss": 1.8122, "step": 2800 }, { "epoch": 0.10030977492076566, "grad_norm": 1.5492359399795532, "learning_rate": 0.00019741907354444018, "loss": 1.656, "step": 2801 }, { "epoch": 0.10034558705033395, "grad_norm": 1.6034247875213623, "learning_rate": 0.00019741645468560336, "loss": 1.8924, "step": 2802 }, { "epoch": 0.10038139917990223, "grad_norm": 2.0048415660858154, "learning_rate": 0.00019741383451615376, "loss": 1.6452, "step": 2803 }, { "epoch": 0.10041721130947051, "grad_norm": 1.9224190711975098, "learning_rate": 0.0001974112130361267, "loss": 1.747, "step": 2804 }, { "epoch": 0.10045302343903881, "grad_norm": 1.7946642637252808, "learning_rate": 0.0001974085902455574, "loss": 1.6644, "step": 2805 }, { "epoch": 0.10048883556860709, "grad_norm": 1.3062515258789062, "learning_rate": 0.0001974059661444812, "loss": 1.6385, "step": 2806 }, { "epoch": 0.10052464769817537, "grad_norm": 1.9280710220336914, "learning_rate": 0.00019740334073293334, "loss": 1.7102, "step": 2807 }, { "epoch": 0.10056045982774366, "grad_norm": 1.6444439888000488, "learning_rate": 0.0001974007140109492, "loss": 1.645, "step": 2808 }, { "epoch": 0.10059627195731194, "grad_norm": 2.063418388366699, "learning_rate": 0.00019739808597856405, "loss": 1.9611, "step": 2809 }, { "epoch": 0.10063208408688022, "grad_norm": 1.3895442485809326, "learning_rate": 0.0001973954566358133, "loss": 1.5975, "step": 2810 }, { "epoch": 0.1006678962164485, "grad_norm": 1.1396634578704834, "learning_rate": 0.0001973928259827323, "loss": 1.5006, "step": 2811 }, { "epoch": 0.1007037083460168, "grad_norm": 2.067750930786133, "learning_rate": 0.0001973901940193565, "loss": 1.4993, "step": 2812 }, { "epoch": 0.10073952047558508, "grad_norm": 2.5891499519348145, "learning_rate": 0.00019738756074572127, "loss": 1.7023, "step": 2813 }, { "epoch": 0.10077533260515337, "grad_norm": 1.5110852718353271, "learning_rate": 0.00019738492616186198, "loss": 1.7311, "step": 2814 }, { "epoch": 0.10081114473472165, "grad_norm": 1.5566610097885132, "learning_rate": 0.00019738229026781414, "loss": 1.4559, "step": 2815 }, { "epoch": 0.10084695686428993, "grad_norm": 1.310876488685608, "learning_rate": 0.00019737965306361322, "loss": 1.5475, "step": 2816 }, { "epoch": 0.10088276899385822, "grad_norm": 1.5883020162582397, "learning_rate": 0.00019737701454929468, "loss": 1.6171, "step": 2817 }, { "epoch": 0.1009185811234265, "grad_norm": 1.3175413608551025, "learning_rate": 0.000197374374724894, "loss": 1.6509, "step": 2818 }, { "epoch": 0.1009543932529948, "grad_norm": 1.287886619567871, "learning_rate": 0.0001973717335904467, "loss": 1.5315, "step": 2819 }, { "epoch": 0.10099020538256308, "grad_norm": 2.7122435569763184, "learning_rate": 0.00019736909114598833, "loss": 1.8696, "step": 2820 }, { "epoch": 0.10102601751213136, "grad_norm": 1.4318008422851562, "learning_rate": 0.00019736644739155445, "loss": 1.4318, "step": 2821 }, { "epoch": 0.10106182964169964, "grad_norm": 1.3406039476394653, "learning_rate": 0.00019736380232718062, "loss": 1.6633, "step": 2822 }, { "epoch": 0.10109764177126793, "grad_norm": 1.6335771083831787, "learning_rate": 0.00019736115595290238, "loss": 1.7371, "step": 2823 }, { "epoch": 0.10113345390083621, "grad_norm": 1.8981465101242065, "learning_rate": 0.00019735850826875542, "loss": 1.596, "step": 2824 }, { "epoch": 0.10116926603040449, "grad_norm": 1.0624903440475464, "learning_rate": 0.0001973558592747753, "loss": 1.4223, "step": 2825 }, { "epoch": 0.10120507815997279, "grad_norm": 1.5947264432907104, "learning_rate": 0.00019735320897099764, "loss": 1.9196, "step": 2826 }, { "epoch": 0.10124089028954107, "grad_norm": 1.1546331644058228, "learning_rate": 0.00019735055735745817, "loss": 1.6058, "step": 2827 }, { "epoch": 0.10127670241910935, "grad_norm": 2.101027011871338, "learning_rate": 0.0001973479044341925, "loss": 1.6675, "step": 2828 }, { "epoch": 0.10131251454867764, "grad_norm": 2.380446672439575, "learning_rate": 0.00019734525020123639, "loss": 1.8698, "step": 2829 }, { "epoch": 0.10134832667824592, "grad_norm": 2.061709403991699, "learning_rate": 0.00019734259465862546, "loss": 1.6063, "step": 2830 }, { "epoch": 0.1013841388078142, "grad_norm": 1.974861741065979, "learning_rate": 0.0001973399378063955, "loss": 1.3884, "step": 2831 }, { "epoch": 0.10141995093738249, "grad_norm": 1.3313992023468018, "learning_rate": 0.00019733727964458221, "loss": 1.4632, "step": 2832 }, { "epoch": 0.10145576306695078, "grad_norm": 1.6623539924621582, "learning_rate": 0.00019733462017322142, "loss": 1.8361, "step": 2833 }, { "epoch": 0.10149157519651907, "grad_norm": 1.7105774879455566, "learning_rate": 0.00019733195939234882, "loss": 1.7204, "step": 2834 }, { "epoch": 0.10152738732608735, "grad_norm": 1.2900364398956299, "learning_rate": 0.00019732929730200031, "loss": 1.6921, "step": 2835 }, { "epoch": 0.10156319945565563, "grad_norm": 1.5834048986434937, "learning_rate": 0.00019732663390221162, "loss": 1.876, "step": 2836 }, { "epoch": 0.10159901158522391, "grad_norm": 1.4001473188400269, "learning_rate": 0.0001973239691930186, "loss": 1.6452, "step": 2837 }, { "epoch": 0.1016348237147922, "grad_norm": 2.6770882606506348, "learning_rate": 0.00019732130317445714, "loss": 1.6828, "step": 2838 }, { "epoch": 0.10167063584436048, "grad_norm": 1.2641379833221436, "learning_rate": 0.00019731863584656308, "loss": 1.5711, "step": 2839 }, { "epoch": 0.10170644797392878, "grad_norm": 1.3252092599868774, "learning_rate": 0.0001973159672093723, "loss": 1.8273, "step": 2840 }, { "epoch": 0.10174226010349706, "grad_norm": 1.4117766618728638, "learning_rate": 0.00019731329726292073, "loss": 1.4361, "step": 2841 }, { "epoch": 0.10177807223306534, "grad_norm": 1.7400825023651123, "learning_rate": 0.00019731062600724424, "loss": 1.5425, "step": 2842 }, { "epoch": 0.10181388436263362, "grad_norm": 1.2664161920547485, "learning_rate": 0.0001973079534423788, "loss": 1.7458, "step": 2843 }, { "epoch": 0.10184969649220191, "grad_norm": 1.4911015033721924, "learning_rate": 0.00019730527956836035, "loss": 1.6534, "step": 2844 }, { "epoch": 0.10188550862177019, "grad_norm": 1.494632601737976, "learning_rate": 0.00019730260438522492, "loss": 1.4687, "step": 2845 }, { "epoch": 0.10192132075133847, "grad_norm": 2.710315227508545, "learning_rate": 0.00019729992789300845, "loss": 1.6687, "step": 2846 }, { "epoch": 0.10195713288090676, "grad_norm": 1.292092204093933, "learning_rate": 0.00019729725009174693, "loss": 1.6175, "step": 2847 }, { "epoch": 0.10199294501047505, "grad_norm": 1.6304455995559692, "learning_rate": 0.00019729457098147647, "loss": 1.4029, "step": 2848 }, { "epoch": 0.10202875714004334, "grad_norm": 1.7625586986541748, "learning_rate": 0.000197291890562233, "loss": 1.9333, "step": 2849 }, { "epoch": 0.10206456926961162, "grad_norm": 2.0807228088378906, "learning_rate": 0.00019728920883405263, "loss": 1.5159, "step": 2850 }, { "epoch": 0.1021003813991799, "grad_norm": 1.6510517597198486, "learning_rate": 0.00019728652579697152, "loss": 1.5753, "step": 2851 }, { "epoch": 0.10213619352874818, "grad_norm": 1.9343862533569336, "learning_rate": 0.00019728384145102564, "loss": 1.615, "step": 2852 }, { "epoch": 0.10217200565831647, "grad_norm": 1.403560996055603, "learning_rate": 0.00019728115579625117, "loss": 1.4167, "step": 2853 }, { "epoch": 0.10220781778788475, "grad_norm": 1.5644052028656006, "learning_rate": 0.00019727846883268427, "loss": 1.7456, "step": 2854 }, { "epoch": 0.10224362991745305, "grad_norm": 1.6829103231430054, "learning_rate": 0.00019727578056036101, "loss": 1.7578, "step": 2855 }, { "epoch": 0.10227944204702133, "grad_norm": 1.861466646194458, "learning_rate": 0.0001972730909793176, "loss": 1.5786, "step": 2856 }, { "epoch": 0.10231525417658961, "grad_norm": 1.6972744464874268, "learning_rate": 0.00019727040008959024, "loss": 1.6002, "step": 2857 }, { "epoch": 0.1023510663061579, "grad_norm": 1.5097100734710693, "learning_rate": 0.00019726770789121512, "loss": 1.5889, "step": 2858 }, { "epoch": 0.10238687843572618, "grad_norm": 1.354004979133606, "learning_rate": 0.00019726501438422842, "loss": 1.6782, "step": 2859 }, { "epoch": 0.10242269056529446, "grad_norm": 1.700211763381958, "learning_rate": 0.00019726231956866645, "loss": 1.3758, "step": 2860 }, { "epoch": 0.10245850269486274, "grad_norm": 2.7783384323120117, "learning_rate": 0.00019725962344456543, "loss": 1.5725, "step": 2861 }, { "epoch": 0.10249431482443104, "grad_norm": 1.8978086709976196, "learning_rate": 0.00019725692601196162, "loss": 1.6209, "step": 2862 }, { "epoch": 0.10253012695399932, "grad_norm": 2.073197841644287, "learning_rate": 0.00019725422727089132, "loss": 1.3348, "step": 2863 }, { "epoch": 0.1025659390835676, "grad_norm": 1.9262858629226685, "learning_rate": 0.00019725152722139085, "loss": 1.8382, "step": 2864 }, { "epoch": 0.10260175121313589, "grad_norm": 1.9772917032241821, "learning_rate": 0.00019724882586349653, "loss": 1.7399, "step": 2865 }, { "epoch": 0.10263756334270417, "grad_norm": 1.9056910276412964, "learning_rate": 0.00019724612319724468, "loss": 1.5189, "step": 2866 }, { "epoch": 0.10267337547227245, "grad_norm": 1.3025951385498047, "learning_rate": 0.0001972434192226717, "loss": 1.6968, "step": 2867 }, { "epoch": 0.10270918760184074, "grad_norm": 1.772749900817871, "learning_rate": 0.00019724071393981393, "loss": 1.3956, "step": 2868 }, { "epoch": 0.10274499973140903, "grad_norm": 2.921922445297241, "learning_rate": 0.00019723800734870782, "loss": 1.7918, "step": 2869 }, { "epoch": 0.10278081186097732, "grad_norm": 1.5922387838363647, "learning_rate": 0.00019723529944938974, "loss": 1.8264, "step": 2870 }, { "epoch": 0.1028166239905456, "grad_norm": 1.159803867340088, "learning_rate": 0.00019723259024189612, "loss": 1.4858, "step": 2871 }, { "epoch": 0.10285243612011388, "grad_norm": 1.4431113004684448, "learning_rate": 0.0001972298797262634, "loss": 1.6408, "step": 2872 }, { "epoch": 0.10288824824968217, "grad_norm": 1.711513876914978, "learning_rate": 0.0001972271679025281, "loss": 1.6958, "step": 2873 }, { "epoch": 0.10292406037925045, "grad_norm": 1.448328971862793, "learning_rate": 0.00019722445477072666, "loss": 1.4791, "step": 2874 }, { "epoch": 0.10295987250881873, "grad_norm": 1.213527798652649, "learning_rate": 0.00019722174033089557, "loss": 1.5364, "step": 2875 }, { "epoch": 0.10299568463838703, "grad_norm": 1.4906288385391235, "learning_rate": 0.0001972190245830714, "loss": 1.4892, "step": 2876 }, { "epoch": 0.10303149676795531, "grad_norm": 1.639464020729065, "learning_rate": 0.00019721630752729064, "loss": 1.7839, "step": 2877 }, { "epoch": 0.1030673088975236, "grad_norm": 2.0337164402008057, "learning_rate": 0.00019721358916358986, "loss": 1.5573, "step": 2878 }, { "epoch": 0.10310312102709188, "grad_norm": 1.2845540046691895, "learning_rate": 0.00019721086949200565, "loss": 1.6272, "step": 2879 }, { "epoch": 0.10313893315666016, "grad_norm": 1.317577838897705, "learning_rate": 0.00019720814851257457, "loss": 1.5472, "step": 2880 }, { "epoch": 0.10317474528622844, "grad_norm": 1.3626635074615479, "learning_rate": 0.00019720542622533323, "loss": 1.6389, "step": 2881 }, { "epoch": 0.10321055741579672, "grad_norm": 1.4564090967178345, "learning_rate": 0.0001972027026303183, "loss": 1.5697, "step": 2882 }, { "epoch": 0.10324636954536502, "grad_norm": 1.459882140159607, "learning_rate": 0.00019719997772756637, "loss": 1.4884, "step": 2883 }, { "epoch": 0.1032821816749333, "grad_norm": 1.6414357423782349, "learning_rate": 0.00019719725151711413, "loss": 2.0682, "step": 2884 }, { "epoch": 0.10331799380450159, "grad_norm": 1.225942850112915, "learning_rate": 0.00019719452399899823, "loss": 1.3942, "step": 2885 }, { "epoch": 0.10335380593406987, "grad_norm": 1.67031991481781, "learning_rate": 0.00019719179517325538, "loss": 1.4331, "step": 2886 }, { "epoch": 0.10338961806363815, "grad_norm": 2.3105432987213135, "learning_rate": 0.00019718906503992233, "loss": 1.7211, "step": 2887 }, { "epoch": 0.10342543019320644, "grad_norm": 1.6651228666305542, "learning_rate": 0.00019718633359903573, "loss": 1.3568, "step": 2888 }, { "epoch": 0.10346124232277472, "grad_norm": 2.5842862129211426, "learning_rate": 0.00019718360085063238, "loss": 1.6142, "step": 2889 }, { "epoch": 0.10349705445234302, "grad_norm": 1.3132528066635132, "learning_rate": 0.00019718086679474905, "loss": 1.5248, "step": 2890 }, { "epoch": 0.1035328665819113, "grad_norm": 1.5135085582733154, "learning_rate": 0.0001971781314314225, "loss": 1.4578, "step": 2891 }, { "epoch": 0.10356867871147958, "grad_norm": 1.8713313341140747, "learning_rate": 0.00019717539476068959, "loss": 1.5403, "step": 2892 }, { "epoch": 0.10360449084104786, "grad_norm": 1.3445090055465698, "learning_rate": 0.00019717265678258702, "loss": 1.6932, "step": 2893 }, { "epoch": 0.10364030297061615, "grad_norm": 2.2110817432403564, "learning_rate": 0.00019716991749715174, "loss": 1.6663, "step": 2894 }, { "epoch": 0.10367611510018443, "grad_norm": 1.4373912811279297, "learning_rate": 0.00019716717690442055, "loss": 1.6872, "step": 2895 }, { "epoch": 0.10371192722975271, "grad_norm": 1.3239036798477173, "learning_rate": 0.00019716443500443034, "loss": 1.4225, "step": 2896 }, { "epoch": 0.103747739359321, "grad_norm": 1.6182363033294678, "learning_rate": 0.00019716169179721799, "loss": 1.7373, "step": 2897 }, { "epoch": 0.10378355148888929, "grad_norm": 1.3850167989730835, "learning_rate": 0.00019715894728282037, "loss": 1.5345, "step": 2898 }, { "epoch": 0.10381936361845757, "grad_norm": 1.4673134088516235, "learning_rate": 0.00019715620146127448, "loss": 1.5932, "step": 2899 }, { "epoch": 0.10385517574802586, "grad_norm": 1.7944717407226562, "learning_rate": 0.0001971534543326172, "loss": 1.6443, "step": 2900 }, { "epoch": 0.10389098787759414, "grad_norm": 1.4910426139831543, "learning_rate": 0.0001971507058968855, "loss": 1.7324, "step": 2901 }, { "epoch": 0.10392680000716242, "grad_norm": 2.181478261947632, "learning_rate": 0.00019714795615411644, "loss": 1.6161, "step": 2902 }, { "epoch": 0.1039626121367307, "grad_norm": 1.2145978212356567, "learning_rate": 0.00019714520510434686, "loss": 1.5633, "step": 2903 }, { "epoch": 0.10399842426629899, "grad_norm": 1.4202697277069092, "learning_rate": 0.0001971424527476139, "loss": 1.3827, "step": 2904 }, { "epoch": 0.10403423639586729, "grad_norm": 2.2743844985961914, "learning_rate": 0.0001971396990839545, "loss": 1.6353, "step": 2905 }, { "epoch": 0.10407004852543557, "grad_norm": 1.5119394063949585, "learning_rate": 0.0001971369441134058, "loss": 1.4954, "step": 2906 }, { "epoch": 0.10410586065500385, "grad_norm": 2.039295196533203, "learning_rate": 0.00019713418783600477, "loss": 1.4147, "step": 2907 }, { "epoch": 0.10414167278457213, "grad_norm": 1.9588508605957031, "learning_rate": 0.00019713143025178856, "loss": 1.7196, "step": 2908 }, { "epoch": 0.10417748491414042, "grad_norm": 1.776458978652954, "learning_rate": 0.00019712867136079427, "loss": 1.5977, "step": 2909 }, { "epoch": 0.1042132970437087, "grad_norm": 1.6060348749160767, "learning_rate": 0.00019712591116305896, "loss": 1.8558, "step": 2910 }, { "epoch": 0.10424910917327698, "grad_norm": 1.828564167022705, "learning_rate": 0.0001971231496586198, "loss": 1.6377, "step": 2911 }, { "epoch": 0.10428492130284528, "grad_norm": 2.2467479705810547, "learning_rate": 0.00019712038684751394, "loss": 1.6065, "step": 2912 }, { "epoch": 0.10432073343241356, "grad_norm": 1.4483753442764282, "learning_rate": 0.0001971176227297786, "loss": 1.4403, "step": 2913 }, { "epoch": 0.10435654556198184, "grad_norm": 1.480074405670166, "learning_rate": 0.00019711485730545086, "loss": 1.4702, "step": 2914 }, { "epoch": 0.10439235769155013, "grad_norm": 2.7046217918395996, "learning_rate": 0.000197112090574568, "loss": 1.5385, "step": 2915 }, { "epoch": 0.10442816982111841, "grad_norm": 1.4356821775436401, "learning_rate": 0.00019710932253716722, "loss": 1.2486, "step": 2916 }, { "epoch": 0.1044639819506867, "grad_norm": 1.5573153495788574, "learning_rate": 0.00019710655319328578, "loss": 1.8457, "step": 2917 }, { "epoch": 0.10449979408025498, "grad_norm": 1.470349669456482, "learning_rate": 0.00019710378254296092, "loss": 2.0201, "step": 2918 }, { "epoch": 0.10453560620982327, "grad_norm": 1.6238548755645752, "learning_rate": 0.00019710101058622993, "loss": 1.279, "step": 2919 }, { "epoch": 0.10457141833939156, "grad_norm": 1.5568904876708984, "learning_rate": 0.00019709823732313008, "loss": 1.6239, "step": 2920 }, { "epoch": 0.10460723046895984, "grad_norm": 2.827681064605713, "learning_rate": 0.0001970954627536987, "loss": 1.8256, "step": 2921 }, { "epoch": 0.10464304259852812, "grad_norm": 1.9860001802444458, "learning_rate": 0.00019709268687797312, "loss": 1.6688, "step": 2922 }, { "epoch": 0.1046788547280964, "grad_norm": 1.3693649768829346, "learning_rate": 0.00019708990969599067, "loss": 1.4567, "step": 2923 }, { "epoch": 0.10471466685766469, "grad_norm": 1.5036529302597046, "learning_rate": 0.00019708713120778873, "loss": 1.7974, "step": 2924 }, { "epoch": 0.10475047898723297, "grad_norm": 1.446396827697754, "learning_rate": 0.00019708435141340465, "loss": 1.5247, "step": 2925 }, { "epoch": 0.10478629111680127, "grad_norm": 1.1922216415405273, "learning_rate": 0.00019708157031287588, "loss": 1.8338, "step": 2926 }, { "epoch": 0.10482210324636955, "grad_norm": 1.8486875295639038, "learning_rate": 0.0001970787879062398, "loss": 1.359, "step": 2927 }, { "epoch": 0.10485791537593783, "grad_norm": 1.4665194749832153, "learning_rate": 0.00019707600419353383, "loss": 1.6474, "step": 2928 }, { "epoch": 0.10489372750550612, "grad_norm": 1.7647480964660645, "learning_rate": 0.00019707321917479547, "loss": 1.6358, "step": 2929 }, { "epoch": 0.1049295396350744, "grad_norm": 1.694216012954712, "learning_rate": 0.00019707043285006214, "loss": 1.618, "step": 2930 }, { "epoch": 0.10496535176464268, "grad_norm": 1.8111251592636108, "learning_rate": 0.00019706764521937138, "loss": 1.6625, "step": 2931 }, { "epoch": 0.10500116389421096, "grad_norm": 1.5972100496292114, "learning_rate": 0.00019706485628276062, "loss": 1.5874, "step": 2932 }, { "epoch": 0.10503697602377926, "grad_norm": 1.9588450193405151, "learning_rate": 0.00019706206604026746, "loss": 1.5872, "step": 2933 }, { "epoch": 0.10507278815334754, "grad_norm": 1.1568310260772705, "learning_rate": 0.00019705927449192937, "loss": 1.4907, "step": 2934 }, { "epoch": 0.10510860028291583, "grad_norm": 1.5908012390136719, "learning_rate": 0.00019705648163778397, "loss": 1.5814, "step": 2935 }, { "epoch": 0.10514441241248411, "grad_norm": 1.7401422262191772, "learning_rate": 0.00019705368747786878, "loss": 1.9683, "step": 2936 }, { "epoch": 0.10518022454205239, "grad_norm": 1.7295998334884644, "learning_rate": 0.00019705089201222143, "loss": 1.3394, "step": 2937 }, { "epoch": 0.10521603667162067, "grad_norm": 1.5581072568893433, "learning_rate": 0.00019704809524087952, "loss": 1.7824, "step": 2938 }, { "epoch": 0.10525184880118896, "grad_norm": 1.2611238956451416, "learning_rate": 0.00019704529716388068, "loss": 1.5758, "step": 2939 }, { "epoch": 0.10528766093075725, "grad_norm": 1.469044804573059, "learning_rate": 0.00019704249778126253, "loss": 1.5392, "step": 2940 }, { "epoch": 0.10532347306032554, "grad_norm": 1.7068703174591064, "learning_rate": 0.00019703969709306273, "loss": 1.6083, "step": 2941 }, { "epoch": 0.10535928518989382, "grad_norm": 1.3602216243743896, "learning_rate": 0.000197036895099319, "loss": 1.9103, "step": 2942 }, { "epoch": 0.1053950973194621, "grad_norm": 1.29122793674469, "learning_rate": 0.000197034091800069, "loss": 1.9111, "step": 2943 }, { "epoch": 0.10543090944903039, "grad_norm": 1.4069241285324097, "learning_rate": 0.00019703128719535047, "loss": 1.6298, "step": 2944 }, { "epoch": 0.10546672157859867, "grad_norm": 1.6360414028167725, "learning_rate": 0.00019702848128520112, "loss": 1.5577, "step": 2945 }, { "epoch": 0.10550253370816695, "grad_norm": 2.210772752761841, "learning_rate": 0.00019702567406965874, "loss": 1.8814, "step": 2946 }, { "epoch": 0.10553834583773523, "grad_norm": 1.9793405532836914, "learning_rate": 0.00019702286554876107, "loss": 1.8386, "step": 2947 }, { "epoch": 0.10557415796730353, "grad_norm": 1.657800316810608, "learning_rate": 0.00019702005572254586, "loss": 1.603, "step": 2948 }, { "epoch": 0.10560997009687181, "grad_norm": 2.0030272006988525, "learning_rate": 0.00019701724459105096, "loss": 1.615, "step": 2949 }, { "epoch": 0.1056457822264401, "grad_norm": 1.4756288528442383, "learning_rate": 0.0001970144321543142, "loss": 1.713, "step": 2950 }, { "epoch": 0.10568159435600838, "grad_norm": 1.9561413526535034, "learning_rate": 0.00019701161841237337, "loss": 1.5548, "step": 2951 }, { "epoch": 0.10571740648557666, "grad_norm": 2.031585216522217, "learning_rate": 0.00019700880336526635, "loss": 1.9009, "step": 2952 }, { "epoch": 0.10575321861514494, "grad_norm": 1.3856836557388306, "learning_rate": 0.000197005987013031, "loss": 1.9266, "step": 2953 }, { "epoch": 0.10578903074471323, "grad_norm": 1.4809298515319824, "learning_rate": 0.00019700316935570525, "loss": 1.7208, "step": 2954 }, { "epoch": 0.10582484287428152, "grad_norm": 1.3451876640319824, "learning_rate": 0.00019700035039332697, "loss": 1.6572, "step": 2955 }, { "epoch": 0.10586065500384981, "grad_norm": 1.6890588998794556, "learning_rate": 0.00019699753012593412, "loss": 1.6136, "step": 2956 }, { "epoch": 0.10589646713341809, "grad_norm": 1.5177700519561768, "learning_rate": 0.0001969947085535646, "loss": 1.2336, "step": 2957 }, { "epoch": 0.10593227926298637, "grad_norm": 2.2147626876831055, "learning_rate": 0.00019699188567625639, "loss": 1.7097, "step": 2958 }, { "epoch": 0.10596809139255466, "grad_norm": 1.2537357807159424, "learning_rate": 0.00019698906149404746, "loss": 1.6603, "step": 2959 }, { "epoch": 0.10600390352212294, "grad_norm": 1.5088328123092651, "learning_rate": 0.00019698623600697583, "loss": 1.6384, "step": 2960 }, { "epoch": 0.10603971565169122, "grad_norm": 1.7185791730880737, "learning_rate": 0.0001969834092150795, "loss": 1.9186, "step": 2961 }, { "epoch": 0.10607552778125952, "grad_norm": 1.3850542306900024, "learning_rate": 0.0001969805811183965, "loss": 1.7726, "step": 2962 }, { "epoch": 0.1061113399108278, "grad_norm": 1.59743332862854, "learning_rate": 0.00019697775171696486, "loss": 1.6747, "step": 2963 }, { "epoch": 0.10614715204039608, "grad_norm": 1.8517948389053345, "learning_rate": 0.00019697492101082266, "loss": 1.6965, "step": 2964 }, { "epoch": 0.10618296416996437, "grad_norm": 1.5334497690200806, "learning_rate": 0.000196972089000008, "loss": 1.6025, "step": 2965 }, { "epoch": 0.10621877629953265, "grad_norm": 1.2686636447906494, "learning_rate": 0.00019696925568455894, "loss": 1.5366, "step": 2966 }, { "epoch": 0.10625458842910093, "grad_norm": 1.518660306930542, "learning_rate": 0.00019696642106451368, "loss": 1.8495, "step": 2967 }, { "epoch": 0.10629040055866922, "grad_norm": 1.365371584892273, "learning_rate": 0.00019696358513991027, "loss": 1.8094, "step": 2968 }, { "epoch": 0.10632621268823751, "grad_norm": 1.5992499589920044, "learning_rate": 0.0001969607479107869, "loss": 1.6623, "step": 2969 }, { "epoch": 0.1063620248178058, "grad_norm": 1.7085697650909424, "learning_rate": 0.00019695790937718176, "loss": 1.8765, "step": 2970 }, { "epoch": 0.10639783694737408, "grad_norm": 2.420846700668335, "learning_rate": 0.00019695506953913298, "loss": 2.0682, "step": 2971 }, { "epoch": 0.10643364907694236, "grad_norm": 2.160022735595703, "learning_rate": 0.0001969522283966788, "loss": 1.9651, "step": 2972 }, { "epoch": 0.10646946120651064, "grad_norm": 1.9768903255462646, "learning_rate": 0.00019694938594985747, "loss": 1.5112, "step": 2973 }, { "epoch": 0.10650527333607893, "grad_norm": 1.8344073295593262, "learning_rate": 0.00019694654219870722, "loss": 1.7646, "step": 2974 }, { "epoch": 0.10654108546564721, "grad_norm": 1.4202852249145508, "learning_rate": 0.00019694369714326625, "loss": 1.6141, "step": 2975 }, { "epoch": 0.1065768975952155, "grad_norm": 1.8785535097122192, "learning_rate": 0.00019694085078357293, "loss": 1.7087, "step": 2976 }, { "epoch": 0.10661270972478379, "grad_norm": 1.394516944885254, "learning_rate": 0.00019693800311966549, "loss": 1.5792, "step": 2977 }, { "epoch": 0.10664852185435207, "grad_norm": 1.6537799835205078, "learning_rate": 0.00019693515415158223, "loss": 1.6773, "step": 2978 }, { "epoch": 0.10668433398392035, "grad_norm": 1.922308325767517, "learning_rate": 0.00019693230387936154, "loss": 1.7615, "step": 2979 }, { "epoch": 0.10672014611348864, "grad_norm": 1.586806058883667, "learning_rate": 0.00019692945230304174, "loss": 1.7742, "step": 2980 }, { "epoch": 0.10675595824305692, "grad_norm": 2.630951166152954, "learning_rate": 0.00019692659942266118, "loss": 2.197, "step": 2981 }, { "epoch": 0.1067917703726252, "grad_norm": 1.6085656881332397, "learning_rate": 0.00019692374523825823, "loss": 1.4823, "step": 2982 }, { "epoch": 0.1068275825021935, "grad_norm": 1.7773700952529907, "learning_rate": 0.00019692088974987133, "loss": 1.8165, "step": 2983 }, { "epoch": 0.10686339463176178, "grad_norm": 1.6625611782073975, "learning_rate": 0.0001969180329575389, "loss": 1.6183, "step": 2984 }, { "epoch": 0.10689920676133007, "grad_norm": 3.302849292755127, "learning_rate": 0.0001969151748612993, "loss": 1.8682, "step": 2985 }, { "epoch": 0.10693501889089835, "grad_norm": 1.2780555486679077, "learning_rate": 0.00019691231546119107, "loss": 1.3038, "step": 2986 }, { "epoch": 0.10697083102046663, "grad_norm": 1.9470473527908325, "learning_rate": 0.00019690945475725266, "loss": 1.4459, "step": 2987 }, { "epoch": 0.10700664315003491, "grad_norm": 1.5665614604949951, "learning_rate": 0.0001969065927495225, "loss": 1.6486, "step": 2988 }, { "epoch": 0.1070424552796032, "grad_norm": 1.4052174091339111, "learning_rate": 0.00019690372943803914, "loss": 1.7583, "step": 2989 }, { "epoch": 0.1070782674091715, "grad_norm": 2.11560320854187, "learning_rate": 0.00019690086482284112, "loss": 1.4927, "step": 2990 }, { "epoch": 0.10711407953873978, "grad_norm": 1.2599753141403198, "learning_rate": 0.00019689799890396694, "loss": 1.5102, "step": 2991 }, { "epoch": 0.10714989166830806, "grad_norm": 2.6709375381469727, "learning_rate": 0.0001968951316814552, "loss": 1.7188, "step": 2992 }, { "epoch": 0.10718570379787634, "grad_norm": 2.060213565826416, "learning_rate": 0.0001968922631553444, "loss": 1.5915, "step": 2993 }, { "epoch": 0.10722151592744462, "grad_norm": 1.579801082611084, "learning_rate": 0.00019688939332567325, "loss": 1.5262, "step": 2994 }, { "epoch": 0.10725732805701291, "grad_norm": 1.5131771564483643, "learning_rate": 0.00019688652219248021, "loss": 1.2072, "step": 2995 }, { "epoch": 0.10729314018658119, "grad_norm": 1.1854443550109863, "learning_rate": 0.00019688364975580406, "loss": 1.556, "step": 2996 }, { "epoch": 0.10732895231614947, "grad_norm": 1.2820968627929688, "learning_rate": 0.00019688077601568332, "loss": 1.4393, "step": 2997 }, { "epoch": 0.10736476444571777, "grad_norm": 1.5455936193466187, "learning_rate": 0.00019687790097215675, "loss": 1.6942, "step": 2998 }, { "epoch": 0.10740057657528605, "grad_norm": 1.7165623903274536, "learning_rate": 0.00019687502462526296, "loss": 1.8651, "step": 2999 }, { "epoch": 0.10743638870485434, "grad_norm": 1.641993522644043, "learning_rate": 0.00019687214697504068, "loss": 1.7097, "step": 3000 }, { "epoch": 0.10747220083442262, "grad_norm": 1.5092154741287231, "learning_rate": 0.00019686926802152862, "loss": 1.6165, "step": 3001 }, { "epoch": 0.1075080129639909, "grad_norm": 1.4830560684204102, "learning_rate": 0.0001968663877647655, "loss": 1.6553, "step": 3002 }, { "epoch": 0.10754382509355918, "grad_norm": 1.6284724473953247, "learning_rate": 0.0001968635062047901, "loss": 1.7557, "step": 3003 }, { "epoch": 0.10757963722312747, "grad_norm": 1.5482624769210815, "learning_rate": 0.00019686062334164114, "loss": 1.531, "step": 3004 }, { "epoch": 0.10761544935269576, "grad_norm": 1.963331937789917, "learning_rate": 0.00019685773917535747, "loss": 1.5902, "step": 3005 }, { "epoch": 0.10765126148226405, "grad_norm": 1.459810495376587, "learning_rate": 0.00019685485370597781, "loss": 1.6869, "step": 3006 }, { "epoch": 0.10768707361183233, "grad_norm": 1.9397635459899902, "learning_rate": 0.00019685196693354108, "loss": 1.9816, "step": 3007 }, { "epoch": 0.10772288574140061, "grad_norm": 1.316998839378357, "learning_rate": 0.00019684907885808602, "loss": 1.4956, "step": 3008 }, { "epoch": 0.1077586978709689, "grad_norm": 1.8278248310089111, "learning_rate": 0.00019684618947965157, "loss": 1.7439, "step": 3009 }, { "epoch": 0.10779451000053718, "grad_norm": 1.5549050569534302, "learning_rate": 0.00019684329879827655, "loss": 1.7759, "step": 3010 }, { "epoch": 0.10783032213010546, "grad_norm": 1.860183596611023, "learning_rate": 0.00019684040681399988, "loss": 1.7717, "step": 3011 }, { "epoch": 0.10786613425967376, "grad_norm": 1.4960808753967285, "learning_rate": 0.0001968375135268604, "loss": 1.8338, "step": 3012 }, { "epoch": 0.10790194638924204, "grad_norm": 2.194607734680176, "learning_rate": 0.00019683461893689713, "loss": 1.3071, "step": 3013 }, { "epoch": 0.10793775851881032, "grad_norm": 1.5040713548660278, "learning_rate": 0.00019683172304414895, "loss": 1.9321, "step": 3014 }, { "epoch": 0.1079735706483786, "grad_norm": 1.7146315574645996, "learning_rate": 0.00019682882584865486, "loss": 1.6084, "step": 3015 }, { "epoch": 0.10800938277794689, "grad_norm": 1.6277421712875366, "learning_rate": 0.0001968259273504538, "loss": 1.296, "step": 3016 }, { "epoch": 0.10804519490751517, "grad_norm": 3.6533689498901367, "learning_rate": 0.0001968230275495848, "loss": 1.5586, "step": 3017 }, { "epoch": 0.10808100703708345, "grad_norm": 1.6158928871154785, "learning_rate": 0.00019682012644608684, "loss": 1.9503, "step": 3018 }, { "epoch": 0.10811681916665175, "grad_norm": 1.6434288024902344, "learning_rate": 0.000196817224039999, "loss": 1.5552, "step": 3019 }, { "epoch": 0.10815263129622003, "grad_norm": 1.395654320716858, "learning_rate": 0.00019681432033136025, "loss": 1.6248, "step": 3020 }, { "epoch": 0.10818844342578832, "grad_norm": 1.5744826793670654, "learning_rate": 0.00019681141532020973, "loss": 1.4838, "step": 3021 }, { "epoch": 0.1082242555553566, "grad_norm": 1.7468857765197754, "learning_rate": 0.00019680850900658648, "loss": 1.8954, "step": 3022 }, { "epoch": 0.10826006768492488, "grad_norm": 2.2361481189727783, "learning_rate": 0.00019680560139052962, "loss": 1.5789, "step": 3023 }, { "epoch": 0.10829587981449317, "grad_norm": 1.6525112390518188, "learning_rate": 0.00019680269247207826, "loss": 1.705, "step": 3024 }, { "epoch": 0.10833169194406145, "grad_norm": 1.5771716833114624, "learning_rate": 0.00019679978225127154, "loss": 1.5043, "step": 3025 }, { "epoch": 0.10836750407362974, "grad_norm": 1.917914867401123, "learning_rate": 0.00019679687072814863, "loss": 1.8099, "step": 3026 }, { "epoch": 0.10840331620319803, "grad_norm": 1.7994537353515625, "learning_rate": 0.00019679395790274867, "loss": 1.8708, "step": 3027 }, { "epoch": 0.10843912833276631, "grad_norm": 1.5328046083450317, "learning_rate": 0.00019679104377511085, "loss": 1.6289, "step": 3028 }, { "epoch": 0.1084749404623346, "grad_norm": 1.5930429697036743, "learning_rate": 0.0001967881283452744, "loss": 1.6851, "step": 3029 }, { "epoch": 0.10851075259190288, "grad_norm": 1.4349143505096436, "learning_rate": 0.00019678521161327854, "loss": 1.7717, "step": 3030 }, { "epoch": 0.10854656472147116, "grad_norm": 1.4315009117126465, "learning_rate": 0.0001967822935791625, "loss": 1.8928, "step": 3031 }, { "epoch": 0.10858237685103944, "grad_norm": 1.4336453676223755, "learning_rate": 0.0001967793742429655, "loss": 1.9097, "step": 3032 }, { "epoch": 0.10861818898060774, "grad_norm": 1.6382025480270386, "learning_rate": 0.00019677645360472693, "loss": 1.6168, "step": 3033 }, { "epoch": 0.10865400111017602, "grad_norm": 2.577434778213501, "learning_rate": 0.00019677353166448595, "loss": 1.7761, "step": 3034 }, { "epoch": 0.1086898132397443, "grad_norm": 1.9029229879379272, "learning_rate": 0.00019677060842228193, "loss": 1.594, "step": 3035 }, { "epoch": 0.10872562536931259, "grad_norm": 1.9777616262435913, "learning_rate": 0.00019676768387815423, "loss": 1.8283, "step": 3036 }, { "epoch": 0.10876143749888087, "grad_norm": 1.5141868591308594, "learning_rate": 0.00019676475803214217, "loss": 1.3587, "step": 3037 }, { "epoch": 0.10879724962844915, "grad_norm": 1.609864354133606, "learning_rate": 0.0001967618308842851, "loss": 1.7622, "step": 3038 }, { "epoch": 0.10883306175801744, "grad_norm": 2.1204848289489746, "learning_rate": 0.00019675890243462237, "loss": 1.6448, "step": 3039 }, { "epoch": 0.10886887388758573, "grad_norm": 1.144871473312378, "learning_rate": 0.00019675597268319344, "loss": 1.6545, "step": 3040 }, { "epoch": 0.10890468601715401, "grad_norm": 1.7205898761749268, "learning_rate": 0.00019675304163003772, "loss": 1.7451, "step": 3041 }, { "epoch": 0.1089404981467223, "grad_norm": 1.9791665077209473, "learning_rate": 0.00019675010927519462, "loss": 1.871, "step": 3042 }, { "epoch": 0.10897631027629058, "grad_norm": 1.382914423942566, "learning_rate": 0.0001967471756187036, "loss": 1.6498, "step": 3043 }, { "epoch": 0.10901212240585886, "grad_norm": 2.2286922931671143, "learning_rate": 0.0001967442406606041, "loss": 1.5953, "step": 3044 }, { "epoch": 0.10904793453542715, "grad_norm": 1.5717313289642334, "learning_rate": 0.00019674130440093567, "loss": 1.7754, "step": 3045 }, { "epoch": 0.10908374666499543, "grad_norm": 1.2834662199020386, "learning_rate": 0.00019673836683973777, "loss": 1.546, "step": 3046 }, { "epoch": 0.10911955879456371, "grad_norm": 2.3408381938934326, "learning_rate": 0.00019673542797704992, "loss": 1.6439, "step": 3047 }, { "epoch": 0.10915537092413201, "grad_norm": 2.09751558303833, "learning_rate": 0.00019673248781291167, "loss": 1.7954, "step": 3048 }, { "epoch": 0.10919118305370029, "grad_norm": 1.5235075950622559, "learning_rate": 0.00019672954634736257, "loss": 2.0121, "step": 3049 }, { "epoch": 0.10922699518326857, "grad_norm": 2.3699159622192383, "learning_rate": 0.00019672660358044218, "loss": 1.5584, "step": 3050 }, { "epoch": 0.10926280731283686, "grad_norm": 1.5464084148406982, "learning_rate": 0.00019672365951219013, "loss": 1.2083, "step": 3051 }, { "epoch": 0.10929861944240514, "grad_norm": 1.4740662574768066, "learning_rate": 0.00019672071414264598, "loss": 1.6632, "step": 3052 }, { "epoch": 0.10933443157197342, "grad_norm": 1.5872660875320435, "learning_rate": 0.0001967177674718494, "loss": 1.7066, "step": 3053 }, { "epoch": 0.1093702437015417, "grad_norm": 1.4718883037567139, "learning_rate": 0.00019671481949984002, "loss": 1.7492, "step": 3054 }, { "epoch": 0.10940605583111, "grad_norm": 1.7044392824172974, "learning_rate": 0.0001967118702266575, "loss": 1.3809, "step": 3055 }, { "epoch": 0.10944186796067829, "grad_norm": 1.6310226917266846, "learning_rate": 0.0001967089196523415, "loss": 1.8413, "step": 3056 }, { "epoch": 0.10947768009024657, "grad_norm": 1.6687250137329102, "learning_rate": 0.00019670596777693176, "loss": 1.4604, "step": 3057 }, { "epoch": 0.10951349221981485, "grad_norm": 1.9419200420379639, "learning_rate": 0.00019670301460046795, "loss": 1.7134, "step": 3058 }, { "epoch": 0.10954930434938313, "grad_norm": 1.624732255935669, "learning_rate": 0.0001967000601229898, "loss": 1.6658, "step": 3059 }, { "epoch": 0.10958511647895142, "grad_norm": 2.9077885150909424, "learning_rate": 0.00019669710434453707, "loss": 1.8957, "step": 3060 }, { "epoch": 0.1096209286085197, "grad_norm": 1.5916715860366821, "learning_rate": 0.00019669414726514956, "loss": 1.8633, "step": 3061 }, { "epoch": 0.109656740738088, "grad_norm": 1.584800124168396, "learning_rate": 0.000196691188884867, "loss": 1.3014, "step": 3062 }, { "epoch": 0.10969255286765628, "grad_norm": 1.7780625820159912, "learning_rate": 0.00019668822920372922, "loss": 1.7299, "step": 3063 }, { "epoch": 0.10972836499722456, "grad_norm": 1.7964451313018799, "learning_rate": 0.00019668526822177605, "loss": 1.5913, "step": 3064 }, { "epoch": 0.10976417712679284, "grad_norm": 1.9102052450180054, "learning_rate": 0.00019668230593904734, "loss": 1.3066, "step": 3065 }, { "epoch": 0.10979998925636113, "grad_norm": 1.4517691135406494, "learning_rate": 0.00019667934235558285, "loss": 1.4668, "step": 3066 }, { "epoch": 0.10983580138592941, "grad_norm": 1.3721327781677246, "learning_rate": 0.00019667637747142257, "loss": 1.8355, "step": 3067 }, { "epoch": 0.10987161351549769, "grad_norm": 2.1370456218719482, "learning_rate": 0.0001966734112866063, "loss": 1.386, "step": 3068 }, { "epoch": 0.10990742564506599, "grad_norm": 1.892778992652893, "learning_rate": 0.00019667044380117398, "loss": 1.4856, "step": 3069 }, { "epoch": 0.10994323777463427, "grad_norm": 1.4679338932037354, "learning_rate": 0.00019666747501516553, "loss": 1.7188, "step": 3070 }, { "epoch": 0.10997904990420256, "grad_norm": 1.373197317123413, "learning_rate": 0.00019666450492862093, "loss": 1.3631, "step": 3071 }, { "epoch": 0.11001486203377084, "grad_norm": 1.1402403116226196, "learning_rate": 0.0001966615335415801, "loss": 1.5227, "step": 3072 }, { "epoch": 0.11005067416333912, "grad_norm": 1.6488516330718994, "learning_rate": 0.000196658560854083, "loss": 1.7497, "step": 3073 }, { "epoch": 0.1100864862929074, "grad_norm": 1.3037346601486206, "learning_rate": 0.00019665558686616965, "loss": 1.8412, "step": 3074 }, { "epoch": 0.11012229842247569, "grad_norm": 1.519590139389038, "learning_rate": 0.00019665261157788004, "loss": 1.7108, "step": 3075 }, { "epoch": 0.11015811055204398, "grad_norm": 1.3518164157867432, "learning_rate": 0.00019664963498925423, "loss": 1.6536, "step": 3076 }, { "epoch": 0.11019392268161227, "grad_norm": 3.0712289810180664, "learning_rate": 0.00019664665710033226, "loss": 1.6875, "step": 3077 }, { "epoch": 0.11022973481118055, "grad_norm": 1.832874059677124, "learning_rate": 0.0001966436779111542, "loss": 1.8305, "step": 3078 }, { "epoch": 0.11026554694074883, "grad_norm": 1.384520411491394, "learning_rate": 0.00019664069742176006, "loss": 1.6199, "step": 3079 }, { "epoch": 0.11030135907031711, "grad_norm": 1.634929895401001, "learning_rate": 0.00019663771563219006, "loss": 1.6368, "step": 3080 }, { "epoch": 0.1103371711998854, "grad_norm": 1.5089815855026245, "learning_rate": 0.00019663473254248417, "loss": 1.8269, "step": 3081 }, { "epoch": 0.11037298332945368, "grad_norm": 1.8302271366119385, "learning_rate": 0.00019663174815268266, "loss": 1.7455, "step": 3082 }, { "epoch": 0.11040879545902198, "grad_norm": 1.9949239492416382, "learning_rate": 0.0001966287624628256, "loss": 1.8868, "step": 3083 }, { "epoch": 0.11044460758859026, "grad_norm": 1.749061107635498, "learning_rate": 0.0001966257754729532, "loss": 1.9831, "step": 3084 }, { "epoch": 0.11048041971815854, "grad_norm": 1.5313340425491333, "learning_rate": 0.00019662278718310562, "loss": 1.6813, "step": 3085 }, { "epoch": 0.11051623184772683, "grad_norm": 2.523949146270752, "learning_rate": 0.0001966197975933231, "loss": 1.4592, "step": 3086 }, { "epoch": 0.11055204397729511, "grad_norm": 1.696785807609558, "learning_rate": 0.0001966168067036458, "loss": 1.4651, "step": 3087 }, { "epoch": 0.11058785610686339, "grad_norm": 2.151517391204834, "learning_rate": 0.000196613814514114, "loss": 1.7816, "step": 3088 }, { "epoch": 0.11062366823643167, "grad_norm": 1.1579859256744385, "learning_rate": 0.00019661082102476795, "loss": 1.4892, "step": 3089 }, { "epoch": 0.11065948036599997, "grad_norm": 1.6856324672698975, "learning_rate": 0.00019660782623564792, "loss": 1.6944, "step": 3090 }, { "epoch": 0.11069529249556825, "grad_norm": 1.9886382818222046, "learning_rate": 0.0001966048301467942, "loss": 1.5386, "step": 3091 }, { "epoch": 0.11073110462513654, "grad_norm": 1.2798269987106323, "learning_rate": 0.0001966018327582471, "loss": 1.5971, "step": 3092 }, { "epoch": 0.11076691675470482, "grad_norm": 1.2525361776351929, "learning_rate": 0.00019659883407004697, "loss": 1.6204, "step": 3093 }, { "epoch": 0.1108027288842731, "grad_norm": 1.73500657081604, "learning_rate": 0.00019659583408223412, "loss": 1.7557, "step": 3094 }, { "epoch": 0.11083854101384139, "grad_norm": 1.7497897148132324, "learning_rate": 0.00019659283279484891, "loss": 1.7348, "step": 3095 }, { "epoch": 0.11087435314340967, "grad_norm": 1.3274827003479004, "learning_rate": 0.00019658983020793175, "loss": 1.4419, "step": 3096 }, { "epoch": 0.11091016527297795, "grad_norm": 2.2989590167999268, "learning_rate": 0.000196586826321523, "loss": 1.9745, "step": 3097 }, { "epoch": 0.11094597740254625, "grad_norm": 2.2532341480255127, "learning_rate": 0.0001965838211356631, "loss": 1.8956, "step": 3098 }, { "epoch": 0.11098178953211453, "grad_norm": 2.0518624782562256, "learning_rate": 0.00019658081465039246, "loss": 1.681, "step": 3099 }, { "epoch": 0.11101760166168281, "grad_norm": 1.6704341173171997, "learning_rate": 0.00019657780686575157, "loss": 1.7184, "step": 3100 }, { "epoch": 0.1110534137912511, "grad_norm": 1.0749001502990723, "learning_rate": 0.00019657479778178083, "loss": 1.6137, "step": 3101 }, { "epoch": 0.11108922592081938, "grad_norm": 1.7141863107681274, "learning_rate": 0.00019657178739852075, "loss": 1.484, "step": 3102 }, { "epoch": 0.11112503805038766, "grad_norm": 1.4108766317367554, "learning_rate": 0.00019656877571601187, "loss": 1.8166, "step": 3103 }, { "epoch": 0.11116085017995594, "grad_norm": 1.4703015089035034, "learning_rate": 0.00019656576273429467, "loss": 1.796, "step": 3104 }, { "epoch": 0.11119666230952424, "grad_norm": 1.2321605682373047, "learning_rate": 0.0001965627484534097, "loss": 1.6408, "step": 3105 }, { "epoch": 0.11123247443909252, "grad_norm": 1.6735345125198364, "learning_rate": 0.0001965597328733975, "loss": 1.6995, "step": 3106 }, { "epoch": 0.1112682865686608, "grad_norm": 1.1838451623916626, "learning_rate": 0.00019655671599429865, "loss": 1.663, "step": 3107 }, { "epoch": 0.11130409869822909, "grad_norm": 1.8111246824264526, "learning_rate": 0.0001965536978161537, "loss": 1.7964, "step": 3108 }, { "epoch": 0.11133991082779737, "grad_norm": 2.363241195678711, "learning_rate": 0.00019655067833900333, "loss": 1.5979, "step": 3109 }, { "epoch": 0.11137572295736566, "grad_norm": 1.3892968893051147, "learning_rate": 0.00019654765756288813, "loss": 1.9266, "step": 3110 }, { "epoch": 0.11141153508693394, "grad_norm": 2.5642900466918945, "learning_rate": 0.00019654463548784873, "loss": 1.6784, "step": 3111 }, { "epoch": 0.11144734721650223, "grad_norm": 1.9628474712371826, "learning_rate": 0.00019654161211392576, "loss": 1.5582, "step": 3112 }, { "epoch": 0.11148315934607052, "grad_norm": 1.6404321193695068, "learning_rate": 0.00019653858744115996, "loss": 1.4472, "step": 3113 }, { "epoch": 0.1115189714756388, "grad_norm": 1.7177790403366089, "learning_rate": 0.00019653556146959197, "loss": 1.4041, "step": 3114 }, { "epoch": 0.11155478360520708, "grad_norm": 1.340905785560608, "learning_rate": 0.00019653253419926254, "loss": 1.7599, "step": 3115 }, { "epoch": 0.11159059573477537, "grad_norm": 1.997868537902832, "learning_rate": 0.00019652950563021237, "loss": 1.6725, "step": 3116 }, { "epoch": 0.11162640786434365, "grad_norm": 1.5993092060089111, "learning_rate": 0.00019652647576248223, "loss": 1.6371, "step": 3117 }, { "epoch": 0.11166221999391193, "grad_norm": 1.9976245164871216, "learning_rate": 0.00019652344459611287, "loss": 1.8941, "step": 3118 }, { "epoch": 0.11169803212348023, "grad_norm": 1.9197741746902466, "learning_rate": 0.00019652041213114504, "loss": 1.9326, "step": 3119 }, { "epoch": 0.11173384425304851, "grad_norm": 1.5483609437942505, "learning_rate": 0.0001965173783676196, "loss": 1.3853, "step": 3120 }, { "epoch": 0.1117696563826168, "grad_norm": 1.5547926425933838, "learning_rate": 0.0001965143433055773, "loss": 1.769, "step": 3121 }, { "epoch": 0.11180546851218508, "grad_norm": 1.8922780752182007, "learning_rate": 0.00019651130694505904, "loss": 1.7482, "step": 3122 }, { "epoch": 0.11184128064175336, "grad_norm": 1.4488351345062256, "learning_rate": 0.00019650826928610564, "loss": 1.8088, "step": 3123 }, { "epoch": 0.11187709277132164, "grad_norm": 1.4341516494750977, "learning_rate": 0.00019650523032875791, "loss": 1.361, "step": 3124 }, { "epoch": 0.11191290490088993, "grad_norm": 1.3100446462631226, "learning_rate": 0.00019650219007305686, "loss": 1.53, "step": 3125 }, { "epoch": 0.11194871703045822, "grad_norm": 1.9011170864105225, "learning_rate": 0.00019649914851904327, "loss": 1.5894, "step": 3126 }, { "epoch": 0.1119845291600265, "grad_norm": 1.6128778457641602, "learning_rate": 0.0001964961056667581, "loss": 1.7066, "step": 3127 }, { "epoch": 0.11202034128959479, "grad_norm": 1.8269716501235962, "learning_rate": 0.00019649306151624235, "loss": 1.4852, "step": 3128 }, { "epoch": 0.11205615341916307, "grad_norm": 1.937235713005066, "learning_rate": 0.0001964900160675369, "loss": 1.5758, "step": 3129 }, { "epoch": 0.11209196554873135, "grad_norm": 1.8874437808990479, "learning_rate": 0.00019648696932068272, "loss": 1.98, "step": 3130 }, { "epoch": 0.11212777767829964, "grad_norm": 1.6971936225891113, "learning_rate": 0.0001964839212757209, "loss": 1.5301, "step": 3131 }, { "epoch": 0.11216358980786792, "grad_norm": 1.5790627002716064, "learning_rate": 0.00019648087193269232, "loss": 1.6124, "step": 3132 }, { "epoch": 0.11219940193743622, "grad_norm": 1.3589112758636475, "learning_rate": 0.00019647782129163805, "loss": 1.6747, "step": 3133 }, { "epoch": 0.1122352140670045, "grad_norm": 1.376923680305481, "learning_rate": 0.00019647476935259916, "loss": 1.5895, "step": 3134 }, { "epoch": 0.11227102619657278, "grad_norm": 2.1595699787139893, "learning_rate": 0.0001964717161156167, "loss": 1.5737, "step": 3135 }, { "epoch": 0.11230683832614106, "grad_norm": 1.3219338655471802, "learning_rate": 0.00019646866158073173, "loss": 1.5445, "step": 3136 }, { "epoch": 0.11234265045570935, "grad_norm": 1.44645357131958, "learning_rate": 0.00019646560574798535, "loss": 1.2135, "step": 3137 }, { "epoch": 0.11237846258527763, "grad_norm": 1.1569671630859375, "learning_rate": 0.0001964625486174187, "loss": 1.3581, "step": 3138 }, { "epoch": 0.11241427471484591, "grad_norm": 1.5105891227722168, "learning_rate": 0.00019645949018907283, "loss": 1.5849, "step": 3139 }, { "epoch": 0.11245008684441421, "grad_norm": 1.767600655555725, "learning_rate": 0.000196456430462989, "loss": 1.3581, "step": 3140 }, { "epoch": 0.11248589897398249, "grad_norm": 1.3685157299041748, "learning_rate": 0.00019645336943920828, "loss": 1.7175, "step": 3141 }, { "epoch": 0.11252171110355078, "grad_norm": 1.4004133939743042, "learning_rate": 0.00019645030711777192, "loss": 1.2914, "step": 3142 }, { "epoch": 0.11255752323311906, "grad_norm": 1.5391340255737305, "learning_rate": 0.0001964472434987211, "loss": 1.7839, "step": 3143 }, { "epoch": 0.11259333536268734, "grad_norm": 1.2137620449066162, "learning_rate": 0.00019644417858209702, "loss": 1.5005, "step": 3144 }, { "epoch": 0.11262914749225562, "grad_norm": 1.874804139137268, "learning_rate": 0.00019644111236794088, "loss": 1.8163, "step": 3145 }, { "epoch": 0.1126649596218239, "grad_norm": 1.817853569984436, "learning_rate": 0.000196438044856294, "loss": 1.6768, "step": 3146 }, { "epoch": 0.11270077175139219, "grad_norm": 1.3849821090698242, "learning_rate": 0.0001964349760471976, "loss": 1.5519, "step": 3147 }, { "epoch": 0.11273658388096049, "grad_norm": 1.526769995689392, "learning_rate": 0.00019643190594069302, "loss": 1.939, "step": 3148 }, { "epoch": 0.11277239601052877, "grad_norm": 2.315960645675659, "learning_rate": 0.00019642883453682152, "loss": 1.8201, "step": 3149 }, { "epoch": 0.11280820814009705, "grad_norm": 1.7398210763931274, "learning_rate": 0.00019642576183562444, "loss": 1.4776, "step": 3150 }, { "epoch": 0.11284402026966533, "grad_norm": 1.768480658531189, "learning_rate": 0.00019642268783714312, "loss": 1.9995, "step": 3151 }, { "epoch": 0.11287983239923362, "grad_norm": 1.9272271394729614, "learning_rate": 0.0001964196125414189, "loss": 1.6427, "step": 3152 }, { "epoch": 0.1129156445288019, "grad_norm": 1.451150894165039, "learning_rate": 0.0001964165359484932, "loss": 1.6079, "step": 3153 }, { "epoch": 0.11295145665837018, "grad_norm": 1.640161395072937, "learning_rate": 0.00019641345805840733, "loss": 1.5618, "step": 3154 }, { "epoch": 0.11298726878793848, "grad_norm": 1.5714844465255737, "learning_rate": 0.00019641037887120277, "loss": 1.7151, "step": 3155 }, { "epoch": 0.11302308091750676, "grad_norm": 1.5246632099151611, "learning_rate": 0.00019640729838692092, "loss": 1.425, "step": 3156 }, { "epoch": 0.11305889304707505, "grad_norm": 1.6431224346160889, "learning_rate": 0.00019640421660560323, "loss": 1.6421, "step": 3157 }, { "epoch": 0.11309470517664333, "grad_norm": 1.2764418125152588, "learning_rate": 0.00019640113352729116, "loss": 1.5922, "step": 3158 }, { "epoch": 0.11313051730621161, "grad_norm": 1.4131271839141846, "learning_rate": 0.00019639804915202617, "loss": 1.5558, "step": 3159 }, { "epoch": 0.1131663294357799, "grad_norm": 1.229662537574768, "learning_rate": 0.0001963949634798498, "loss": 1.5588, "step": 3160 }, { "epoch": 0.11320214156534818, "grad_norm": 1.4876227378845215, "learning_rate": 0.0001963918765108035, "loss": 1.4488, "step": 3161 }, { "epoch": 0.11323795369491647, "grad_norm": 1.2596564292907715, "learning_rate": 0.00019638878824492886, "loss": 1.4179, "step": 3162 }, { "epoch": 0.11327376582448476, "grad_norm": 1.4421182870864868, "learning_rate": 0.0001963856986822674, "loss": 1.3701, "step": 3163 }, { "epoch": 0.11330957795405304, "grad_norm": 1.4923369884490967, "learning_rate": 0.00019638260782286072, "loss": 1.3928, "step": 3164 }, { "epoch": 0.11334539008362132, "grad_norm": 1.701507806777954, "learning_rate": 0.00019637951566675035, "loss": 1.8446, "step": 3165 }, { "epoch": 0.1133812022131896, "grad_norm": 1.7031058073043823, "learning_rate": 0.00019637642221397792, "loss": 1.6509, "step": 3166 }, { "epoch": 0.11341701434275789, "grad_norm": 1.7792160511016846, "learning_rate": 0.00019637332746458506, "loss": 1.7611, "step": 3167 }, { "epoch": 0.11345282647232617, "grad_norm": 2.105922222137451, "learning_rate": 0.00019637023141861338, "loss": 1.716, "step": 3168 }, { "epoch": 0.11348863860189447, "grad_norm": 2.09403920173645, "learning_rate": 0.00019636713407610455, "loss": 1.5287, "step": 3169 }, { "epoch": 0.11352445073146275, "grad_norm": 1.582002878189087, "learning_rate": 0.0001963640354371002, "loss": 2.0743, "step": 3170 }, { "epoch": 0.11356026286103103, "grad_norm": 1.5988606214523315, "learning_rate": 0.00019636093550164208, "loss": 1.6238, "step": 3171 }, { "epoch": 0.11359607499059932, "grad_norm": 1.5815061330795288, "learning_rate": 0.00019635783426977187, "loss": 2.0104, "step": 3172 }, { "epoch": 0.1136318871201676, "grad_norm": 1.422127366065979, "learning_rate": 0.00019635473174153128, "loss": 1.7212, "step": 3173 }, { "epoch": 0.11366769924973588, "grad_norm": 1.4015790224075317, "learning_rate": 0.00019635162791696212, "loss": 1.672, "step": 3174 }, { "epoch": 0.11370351137930416, "grad_norm": 1.4458675384521484, "learning_rate": 0.00019634852279610602, "loss": 1.6764, "step": 3175 }, { "epoch": 0.11373932350887246, "grad_norm": 2.8560314178466797, "learning_rate": 0.00019634541637900487, "loss": 1.6277, "step": 3176 }, { "epoch": 0.11377513563844074, "grad_norm": 1.758179783821106, "learning_rate": 0.0001963423086657004, "loss": 1.4349, "step": 3177 }, { "epoch": 0.11381094776800903, "grad_norm": 1.4987274408340454, "learning_rate": 0.00019633919965623444, "loss": 1.7514, "step": 3178 }, { "epoch": 0.11384675989757731, "grad_norm": 1.8860074281692505, "learning_rate": 0.0001963360893506488, "loss": 1.4095, "step": 3179 }, { "epoch": 0.11388257202714559, "grad_norm": 1.5295324325561523, "learning_rate": 0.0001963329777489854, "loss": 1.479, "step": 3180 }, { "epoch": 0.11391838415671388, "grad_norm": 1.5854120254516602, "learning_rate": 0.00019632986485128602, "loss": 1.6353, "step": 3181 }, { "epoch": 0.11395419628628216, "grad_norm": 1.5801342725753784, "learning_rate": 0.00019632675065759254, "loss": 1.4634, "step": 3182 }, { "epoch": 0.11399000841585046, "grad_norm": 1.8172426223754883, "learning_rate": 0.0001963236351679469, "loss": 1.7716, "step": 3183 }, { "epoch": 0.11402582054541874, "grad_norm": 1.4854059219360352, "learning_rate": 0.00019632051838239099, "loss": 1.6442, "step": 3184 }, { "epoch": 0.11406163267498702, "grad_norm": 1.682544231414795, "learning_rate": 0.00019631740030096677, "loss": 1.9143, "step": 3185 }, { "epoch": 0.1140974448045553, "grad_norm": 1.281859040260315, "learning_rate": 0.00019631428092371612, "loss": 1.5915, "step": 3186 }, { "epoch": 0.11413325693412359, "grad_norm": 1.3597787618637085, "learning_rate": 0.00019631116025068112, "loss": 1.6747, "step": 3187 }, { "epoch": 0.11416906906369187, "grad_norm": 1.622419834136963, "learning_rate": 0.00019630803828190368, "loss": 1.848, "step": 3188 }, { "epoch": 0.11420488119326015, "grad_norm": 1.744403600692749, "learning_rate": 0.00019630491501742577, "loss": 1.5405, "step": 3189 }, { "epoch": 0.11424069332282845, "grad_norm": 1.3016765117645264, "learning_rate": 0.00019630179045728946, "loss": 1.5965, "step": 3190 }, { "epoch": 0.11427650545239673, "grad_norm": 1.3310681581497192, "learning_rate": 0.00019629866460153683, "loss": 1.5548, "step": 3191 }, { "epoch": 0.11431231758196501, "grad_norm": 2.2537686824798584, "learning_rate": 0.00019629553745020983, "loss": 1.2023, "step": 3192 }, { "epoch": 0.1143481297115333, "grad_norm": 1.3856066465377808, "learning_rate": 0.00019629240900335062, "loss": 1.5476, "step": 3193 }, { "epoch": 0.11438394184110158, "grad_norm": 1.8118864297866821, "learning_rate": 0.00019628927926100125, "loss": 1.4654, "step": 3194 }, { "epoch": 0.11441975397066986, "grad_norm": 1.674796223640442, "learning_rate": 0.0001962861482232038, "loss": 1.549, "step": 3195 }, { "epoch": 0.11445556610023815, "grad_norm": 1.317910075187683, "learning_rate": 0.00019628301589000047, "loss": 1.3573, "step": 3196 }, { "epoch": 0.11449137822980643, "grad_norm": 2.1432268619537354, "learning_rate": 0.00019627988226143334, "loss": 1.477, "step": 3197 }, { "epoch": 0.11452719035937473, "grad_norm": 1.5118340253829956, "learning_rate": 0.00019627674733754458, "loss": 1.5706, "step": 3198 }, { "epoch": 0.11456300248894301, "grad_norm": 1.699642539024353, "learning_rate": 0.00019627361111837637, "loss": 1.4643, "step": 3199 }, { "epoch": 0.11459881461851129, "grad_norm": 1.292831301689148, "learning_rate": 0.00019627047360397092, "loss": 1.8203, "step": 3200 }, { "epoch": 0.11463462674807957, "grad_norm": 1.6774110794067383, "learning_rate": 0.00019626733479437042, "loss": 1.9212, "step": 3201 }, { "epoch": 0.11467043887764786, "grad_norm": 1.5225844383239746, "learning_rate": 0.0001962641946896171, "loss": 1.6182, "step": 3202 }, { "epoch": 0.11470625100721614, "grad_norm": 2.3992981910705566, "learning_rate": 0.0001962610532897532, "loss": 1.9434, "step": 3203 }, { "epoch": 0.11474206313678442, "grad_norm": 1.9705089330673218, "learning_rate": 0.00019625791059482106, "loss": 1.4532, "step": 3204 }, { "epoch": 0.11477787526635272, "grad_norm": 2.258025884628296, "learning_rate": 0.00019625476660486285, "loss": 1.3525, "step": 3205 }, { "epoch": 0.114813687395921, "grad_norm": 1.7987005710601807, "learning_rate": 0.0001962516213199209, "loss": 1.7462, "step": 3206 }, { "epoch": 0.11484949952548928, "grad_norm": 1.720633864402771, "learning_rate": 0.00019624847474003756, "loss": 1.7263, "step": 3207 }, { "epoch": 0.11488531165505757, "grad_norm": 1.8208423852920532, "learning_rate": 0.00019624532686525513, "loss": 1.5331, "step": 3208 }, { "epoch": 0.11492112378462585, "grad_norm": 1.2464954853057861, "learning_rate": 0.000196242177695616, "loss": 1.4239, "step": 3209 }, { "epoch": 0.11495693591419413, "grad_norm": 1.676194429397583, "learning_rate": 0.0001962390272311625, "loss": 1.8397, "step": 3210 }, { "epoch": 0.11499274804376242, "grad_norm": 1.4178789854049683, "learning_rate": 0.00019623587547193703, "loss": 1.525, "step": 3211 }, { "epoch": 0.11502856017333071, "grad_norm": 1.856972098350525, "learning_rate": 0.00019623272241798198, "loss": 1.9044, "step": 3212 }, { "epoch": 0.115064372302899, "grad_norm": 2.108940362930298, "learning_rate": 0.0001962295680693398, "loss": 1.7134, "step": 3213 }, { "epoch": 0.11510018443246728, "grad_norm": 1.6334648132324219, "learning_rate": 0.0001962264124260529, "loss": 1.5537, "step": 3214 }, { "epoch": 0.11513599656203556, "grad_norm": 1.7307848930358887, "learning_rate": 0.00019622325548816373, "loss": 1.7397, "step": 3215 }, { "epoch": 0.11517180869160384, "grad_norm": 1.7003225088119507, "learning_rate": 0.0001962200972557148, "loss": 1.7081, "step": 3216 }, { "epoch": 0.11520762082117213, "grad_norm": 1.5408225059509277, "learning_rate": 0.00019621693772874855, "loss": 1.3395, "step": 3217 }, { "epoch": 0.11524343295074041, "grad_norm": 1.5445457696914673, "learning_rate": 0.00019621377690730754, "loss": 1.5551, "step": 3218 }, { "epoch": 0.1152792450803087, "grad_norm": 2.64323353767395, "learning_rate": 0.00019621061479143425, "loss": 1.662, "step": 3219 }, { "epoch": 0.11531505720987699, "grad_norm": 1.4125784635543823, "learning_rate": 0.00019620745138117124, "loss": 1.498, "step": 3220 }, { "epoch": 0.11535086933944527, "grad_norm": 1.3482048511505127, "learning_rate": 0.00019620428667656108, "loss": 1.6449, "step": 3221 }, { "epoch": 0.11538668146901356, "grad_norm": 1.5159589052200317, "learning_rate": 0.00019620112067764636, "loss": 1.7357, "step": 3222 }, { "epoch": 0.11542249359858184, "grad_norm": 1.3427600860595703, "learning_rate": 0.0001961979533844696, "loss": 1.4208, "step": 3223 }, { "epoch": 0.11545830572815012, "grad_norm": 2.6894454956054688, "learning_rate": 0.0001961947847970735, "loss": 1.7024, "step": 3224 }, { "epoch": 0.1154941178577184, "grad_norm": 2.293968677520752, "learning_rate": 0.00019619161491550065, "loss": 1.2496, "step": 3225 }, { "epoch": 0.1155299299872867, "grad_norm": 2.2852323055267334, "learning_rate": 0.00019618844373979372, "loss": 1.6852, "step": 3226 }, { "epoch": 0.11556574211685498, "grad_norm": 1.5715569257736206, "learning_rate": 0.0001961852712699953, "loss": 1.4904, "step": 3227 }, { "epoch": 0.11560155424642327, "grad_norm": 1.5239616632461548, "learning_rate": 0.00019618209750614813, "loss": 1.4977, "step": 3228 }, { "epoch": 0.11563736637599155, "grad_norm": 1.4947649240493774, "learning_rate": 0.00019617892244829495, "loss": 1.5866, "step": 3229 }, { "epoch": 0.11567317850555983, "grad_norm": 1.623744010925293, "learning_rate": 0.0001961757460964784, "loss": 1.5114, "step": 3230 }, { "epoch": 0.11570899063512811, "grad_norm": 1.7694364786148071, "learning_rate": 0.00019617256845074125, "loss": 1.6273, "step": 3231 }, { "epoch": 0.1157448027646964, "grad_norm": 1.5195114612579346, "learning_rate": 0.00019616938951112623, "loss": 1.3845, "step": 3232 }, { "epoch": 0.1157806148942647, "grad_norm": 1.6811881065368652, "learning_rate": 0.00019616620927767614, "loss": 1.3784, "step": 3233 }, { "epoch": 0.11581642702383298, "grad_norm": 1.8384026288986206, "learning_rate": 0.00019616302775043377, "loss": 1.6615, "step": 3234 }, { "epoch": 0.11585223915340126, "grad_norm": 1.5395166873931885, "learning_rate": 0.00019615984492944187, "loss": 1.7524, "step": 3235 }, { "epoch": 0.11588805128296954, "grad_norm": 1.3467326164245605, "learning_rate": 0.00019615666081474332, "loss": 1.6619, "step": 3236 }, { "epoch": 0.11592386341253783, "grad_norm": 1.4491621255874634, "learning_rate": 0.00019615347540638092, "loss": 1.5844, "step": 3237 }, { "epoch": 0.11595967554210611, "grad_norm": 2.1446890830993652, "learning_rate": 0.00019615028870439752, "loss": 1.6285, "step": 3238 }, { "epoch": 0.11599548767167439, "grad_norm": 2.09091854095459, "learning_rate": 0.00019614710070883602, "loss": 1.5445, "step": 3239 }, { "epoch": 0.11603129980124269, "grad_norm": 1.3230056762695312, "learning_rate": 0.00019614391141973934, "loss": 1.5391, "step": 3240 }, { "epoch": 0.11606711193081097, "grad_norm": 1.5421323776245117, "learning_rate": 0.00019614072083715028, "loss": 1.4999, "step": 3241 }, { "epoch": 0.11610292406037925, "grad_norm": 1.7281304597854614, "learning_rate": 0.00019613752896111187, "loss": 1.5916, "step": 3242 }, { "epoch": 0.11613873618994754, "grad_norm": 2.072479724884033, "learning_rate": 0.00019613433579166706, "loss": 1.922, "step": 3243 }, { "epoch": 0.11617454831951582, "grad_norm": 1.6558066606521606, "learning_rate": 0.0001961311413288587, "loss": 1.6862, "step": 3244 }, { "epoch": 0.1162103604490841, "grad_norm": 2.008986711502075, "learning_rate": 0.00019612794557272983, "loss": 1.5542, "step": 3245 }, { "epoch": 0.11624617257865238, "grad_norm": 1.22840416431427, "learning_rate": 0.00019612474852332348, "loss": 1.4519, "step": 3246 }, { "epoch": 0.11628198470822067, "grad_norm": 1.6340930461883545, "learning_rate": 0.00019612155018068264, "loss": 1.5469, "step": 3247 }, { "epoch": 0.11631779683778896, "grad_norm": 2.318631887435913, "learning_rate": 0.00019611835054485032, "loss": 1.7494, "step": 3248 }, { "epoch": 0.11635360896735725, "grad_norm": 1.6667400598526, "learning_rate": 0.00019611514961586957, "loss": 1.3893, "step": 3249 }, { "epoch": 0.11638942109692553, "grad_norm": 1.4194461107254028, "learning_rate": 0.00019611194739378344, "loss": 1.8322, "step": 3250 }, { "epoch": 0.11642523322649381, "grad_norm": 1.7950221300125122, "learning_rate": 0.00019610874387863508, "loss": 1.4541, "step": 3251 }, { "epoch": 0.1164610453560621, "grad_norm": 1.682098627090454, "learning_rate": 0.00019610553907046748, "loss": 1.6563, "step": 3252 }, { "epoch": 0.11649685748563038, "grad_norm": 2.29400372505188, "learning_rate": 0.0001961023329693239, "loss": 1.7833, "step": 3253 }, { "epoch": 0.11653266961519866, "grad_norm": 1.656288981437683, "learning_rate": 0.00019609912557524734, "loss": 1.5191, "step": 3254 }, { "epoch": 0.11656848174476696, "grad_norm": 1.3235529661178589, "learning_rate": 0.000196095916888281, "loss": 1.539, "step": 3255 }, { "epoch": 0.11660429387433524, "grad_norm": 1.794012188911438, "learning_rate": 0.00019609270690846807, "loss": 1.7339, "step": 3256 }, { "epoch": 0.11664010600390352, "grad_norm": 1.4943112134933472, "learning_rate": 0.00019608949563585174, "loss": 1.5602, "step": 3257 }, { "epoch": 0.1166759181334718, "grad_norm": 2.628278970718384, "learning_rate": 0.00019608628307047517, "loss": 1.7145, "step": 3258 }, { "epoch": 0.11671173026304009, "grad_norm": 1.359542965888977, "learning_rate": 0.0001960830692123816, "loss": 1.3189, "step": 3259 }, { "epoch": 0.11674754239260837, "grad_norm": 1.5178779363632202, "learning_rate": 0.00019607985406161425, "loss": 1.527, "step": 3260 }, { "epoch": 0.11678335452217666, "grad_norm": 1.6285661458969116, "learning_rate": 0.00019607663761821644, "loss": 1.8477, "step": 3261 }, { "epoch": 0.11681916665174495, "grad_norm": 1.4759529829025269, "learning_rate": 0.0001960734198822314, "loss": 1.6696, "step": 3262 }, { "epoch": 0.11685497878131323, "grad_norm": 1.8590339422225952, "learning_rate": 0.0001960702008537024, "loss": 1.5988, "step": 3263 }, { "epoch": 0.11689079091088152, "grad_norm": 2.2385072708129883, "learning_rate": 0.00019606698053267277, "loss": 1.7862, "step": 3264 }, { "epoch": 0.1169266030404498, "grad_norm": 1.7401012182235718, "learning_rate": 0.00019606375891918583, "loss": 1.8007, "step": 3265 }, { "epoch": 0.11696241517001808, "grad_norm": 1.3024067878723145, "learning_rate": 0.00019606053601328496, "loss": 1.5707, "step": 3266 }, { "epoch": 0.11699822729958637, "grad_norm": 2.206632375717163, "learning_rate": 0.00019605731181501342, "loss": 1.4116, "step": 3267 }, { "epoch": 0.11703403942915465, "grad_norm": 1.3507251739501953, "learning_rate": 0.00019605408632441474, "loss": 1.6662, "step": 3268 }, { "epoch": 0.11706985155872295, "grad_norm": 1.4132224321365356, "learning_rate": 0.00019605085954153218, "loss": 1.7236, "step": 3269 }, { "epoch": 0.11710566368829123, "grad_norm": 1.848435878753662, "learning_rate": 0.00019604763146640922, "loss": 1.774, "step": 3270 }, { "epoch": 0.11714147581785951, "grad_norm": 1.356055736541748, "learning_rate": 0.00019604440209908925, "loss": 1.9218, "step": 3271 }, { "epoch": 0.1171772879474278, "grad_norm": 1.9709981679916382, "learning_rate": 0.00019604117143961575, "loss": 1.5058, "step": 3272 }, { "epoch": 0.11721310007699608, "grad_norm": 1.8755549192428589, "learning_rate": 0.00019603793948803216, "loss": 1.4719, "step": 3273 }, { "epoch": 0.11724891220656436, "grad_norm": 2.5782339572906494, "learning_rate": 0.000196034706244382, "loss": 1.5972, "step": 3274 }, { "epoch": 0.11728472433613264, "grad_norm": 1.4497991800308228, "learning_rate": 0.0001960314717087087, "loss": 1.4209, "step": 3275 }, { "epoch": 0.11732053646570094, "grad_norm": 1.4412992000579834, "learning_rate": 0.00019602823588105585, "loss": 1.517, "step": 3276 }, { "epoch": 0.11735634859526922, "grad_norm": 1.4440698623657227, "learning_rate": 0.000196024998761467, "loss": 1.5662, "step": 3277 }, { "epoch": 0.1173921607248375, "grad_norm": 3.968775987625122, "learning_rate": 0.00019602176034998556, "loss": 2.1371, "step": 3278 }, { "epoch": 0.11742797285440579, "grad_norm": 1.4765177965164185, "learning_rate": 0.00019601852064665524, "loss": 1.6333, "step": 3279 }, { "epoch": 0.11746378498397407, "grad_norm": 1.5928294658660889, "learning_rate": 0.0001960152796515196, "loss": 1.6321, "step": 3280 }, { "epoch": 0.11749959711354235, "grad_norm": 1.4952478408813477, "learning_rate": 0.00019601203736462219, "loss": 1.7204, "step": 3281 }, { "epoch": 0.11753540924311064, "grad_norm": 2.540161609649658, "learning_rate": 0.00019600879378600666, "loss": 1.6557, "step": 3282 }, { "epoch": 0.11757122137267893, "grad_norm": 1.3990188837051392, "learning_rate": 0.0001960055489157167, "loss": 1.6378, "step": 3283 }, { "epoch": 0.11760703350224722, "grad_norm": 1.4970078468322754, "learning_rate": 0.00019600230275379588, "loss": 1.7807, "step": 3284 }, { "epoch": 0.1176428456318155, "grad_norm": 1.9799847602844238, "learning_rate": 0.0001959990553002879, "loss": 1.6429, "step": 3285 }, { "epoch": 0.11767865776138378, "grad_norm": 1.5458717346191406, "learning_rate": 0.0001959958065552365, "loss": 1.8378, "step": 3286 }, { "epoch": 0.11771446989095206, "grad_norm": 1.1473616361618042, "learning_rate": 0.0001959925565186853, "loss": 1.4595, "step": 3287 }, { "epoch": 0.11775028202052035, "grad_norm": 1.4953563213348389, "learning_rate": 0.00019598930519067813, "loss": 1.6126, "step": 3288 }, { "epoch": 0.11778609415008863, "grad_norm": 1.4678362607955933, "learning_rate": 0.00019598605257125864, "loss": 1.492, "step": 3289 }, { "epoch": 0.11782190627965693, "grad_norm": 2.026249408721924, "learning_rate": 0.0001959827986604706, "loss": 1.6692, "step": 3290 }, { "epoch": 0.11785771840922521, "grad_norm": 1.462710976600647, "learning_rate": 0.00019597954345835787, "loss": 1.4171, "step": 3291 }, { "epoch": 0.11789353053879349, "grad_norm": 1.6118189096450806, "learning_rate": 0.00019597628696496418, "loss": 1.7512, "step": 3292 }, { "epoch": 0.11792934266836178, "grad_norm": 1.621272087097168, "learning_rate": 0.0001959730291803333, "loss": 1.9068, "step": 3293 }, { "epoch": 0.11796515479793006, "grad_norm": 1.9732425212860107, "learning_rate": 0.00019596977010450915, "loss": 1.4781, "step": 3294 }, { "epoch": 0.11800096692749834, "grad_norm": 1.2705248594284058, "learning_rate": 0.00019596650973753555, "loss": 1.9466, "step": 3295 }, { "epoch": 0.11803677905706662, "grad_norm": 2.1659154891967773, "learning_rate": 0.00019596324807945632, "loss": 1.526, "step": 3296 }, { "epoch": 0.1180725911866349, "grad_norm": 1.6250512599945068, "learning_rate": 0.00019595998513031537, "loss": 1.6662, "step": 3297 }, { "epoch": 0.1181084033162032, "grad_norm": 3.29927659034729, "learning_rate": 0.00019595672089015663, "loss": 1.7103, "step": 3298 }, { "epoch": 0.11814421544577149, "grad_norm": 1.6981123685836792, "learning_rate": 0.00019595345535902394, "loss": 1.2357, "step": 3299 }, { "epoch": 0.11818002757533977, "grad_norm": 1.8738186359405518, "learning_rate": 0.0001959501885369613, "loss": 1.6669, "step": 3300 }, { "epoch": 0.11821583970490805, "grad_norm": 2.400287628173828, "learning_rate": 0.00019594692042401263, "loss": 1.6469, "step": 3301 }, { "epoch": 0.11825165183447633, "grad_norm": 2.324725389480591, "learning_rate": 0.00019594365102022193, "loss": 1.8658, "step": 3302 }, { "epoch": 0.11828746396404462, "grad_norm": 1.470152735710144, "learning_rate": 0.00019594038032563315, "loss": 1.7331, "step": 3303 }, { "epoch": 0.1183232760936129, "grad_norm": 1.8536076545715332, "learning_rate": 0.0001959371083402903, "loss": 1.6881, "step": 3304 }, { "epoch": 0.1183590882231812, "grad_norm": 1.580949306488037, "learning_rate": 0.00019593383506423743, "loss": 1.5505, "step": 3305 }, { "epoch": 0.11839490035274948, "grad_norm": 1.4899119138717651, "learning_rate": 0.00019593056049751852, "loss": 1.5014, "step": 3306 }, { "epoch": 0.11843071248231776, "grad_norm": 1.3627156019210815, "learning_rate": 0.0001959272846401777, "loss": 1.502, "step": 3307 }, { "epoch": 0.11846652461188605, "grad_norm": 2.0167927742004395, "learning_rate": 0.000195924007492259, "loss": 1.6057, "step": 3308 }, { "epoch": 0.11850233674145433, "grad_norm": 2.2905960083007812, "learning_rate": 0.00019592072905380648, "loss": 1.6687, "step": 3309 }, { "epoch": 0.11853814887102261, "grad_norm": 2.684022903442383, "learning_rate": 0.00019591744932486428, "loss": 1.6046, "step": 3310 }, { "epoch": 0.1185739610005909, "grad_norm": 1.62175452709198, "learning_rate": 0.00019591416830547657, "loss": 1.5254, "step": 3311 }, { "epoch": 0.11860977313015919, "grad_norm": 1.9423574209213257, "learning_rate": 0.0001959108859956874, "loss": 1.6332, "step": 3312 }, { "epoch": 0.11864558525972747, "grad_norm": 2.0534324645996094, "learning_rate": 0.00019590760239554097, "loss": 1.6084, "step": 3313 }, { "epoch": 0.11868139738929576, "grad_norm": 1.9046193361282349, "learning_rate": 0.00019590431750508153, "loss": 1.585, "step": 3314 }, { "epoch": 0.11871720951886404, "grad_norm": 1.8338521718978882, "learning_rate": 0.00019590103132435314, "loss": 1.8236, "step": 3315 }, { "epoch": 0.11875302164843232, "grad_norm": 1.7030360698699951, "learning_rate": 0.00019589774385340007, "loss": 1.716, "step": 3316 }, { "epoch": 0.1187888337780006, "grad_norm": 2.5421273708343506, "learning_rate": 0.0001958944550922666, "loss": 1.6587, "step": 3317 }, { "epoch": 0.11882464590756889, "grad_norm": 1.5858945846557617, "learning_rate": 0.0001958911650409969, "loss": 1.4128, "step": 3318 }, { "epoch": 0.11886045803713718, "grad_norm": 1.6112788915634155, "learning_rate": 0.0001958878736996353, "loss": 1.5822, "step": 3319 }, { "epoch": 0.11889627016670547, "grad_norm": 1.3577380180358887, "learning_rate": 0.00019588458106822602, "loss": 1.5731, "step": 3320 }, { "epoch": 0.11893208229627375, "grad_norm": 1.960248351097107, "learning_rate": 0.00019588128714681337, "loss": 1.6318, "step": 3321 }, { "epoch": 0.11896789442584203, "grad_norm": 1.8858251571655273, "learning_rate": 0.0001958779919354417, "loss": 1.8675, "step": 3322 }, { "epoch": 0.11900370655541032, "grad_norm": 1.6242177486419678, "learning_rate": 0.00019587469543415532, "loss": 1.5496, "step": 3323 }, { "epoch": 0.1190395186849786, "grad_norm": 1.738443374633789, "learning_rate": 0.00019587139764299857, "loss": 1.614, "step": 3324 }, { "epoch": 0.11907533081454688, "grad_norm": 1.9253852367401123, "learning_rate": 0.00019586809856201586, "loss": 1.8159, "step": 3325 }, { "epoch": 0.11911114294411518, "grad_norm": 1.6207756996154785, "learning_rate": 0.00019586479819125153, "loss": 1.3749, "step": 3326 }, { "epoch": 0.11914695507368346, "grad_norm": 1.6630465984344482, "learning_rate": 0.00019586149653074997, "loss": 1.3199, "step": 3327 }, { "epoch": 0.11918276720325174, "grad_norm": 1.2815748453140259, "learning_rate": 0.00019585819358055567, "loss": 1.6019, "step": 3328 }, { "epoch": 0.11921857933282003, "grad_norm": 2.2043771743774414, "learning_rate": 0.00019585488934071302, "loss": 1.7924, "step": 3329 }, { "epoch": 0.11925439146238831, "grad_norm": 1.9640440940856934, "learning_rate": 0.00019585158381126645, "loss": 1.4117, "step": 3330 }, { "epoch": 0.11929020359195659, "grad_norm": 1.911688208580017, "learning_rate": 0.00019584827699226044, "loss": 1.6101, "step": 3331 }, { "epoch": 0.11932601572152488, "grad_norm": 1.7753043174743652, "learning_rate": 0.00019584496888373955, "loss": 1.353, "step": 3332 }, { "epoch": 0.11936182785109317, "grad_norm": 1.6217472553253174, "learning_rate": 0.00019584165948574822, "loss": 1.6926, "step": 3333 }, { "epoch": 0.11939763998066145, "grad_norm": 1.9915986061096191, "learning_rate": 0.00019583834879833097, "loss": 1.3721, "step": 3334 }, { "epoch": 0.11943345211022974, "grad_norm": 2.1937718391418457, "learning_rate": 0.0001958350368215324, "loss": 1.526, "step": 3335 }, { "epoch": 0.11946926423979802, "grad_norm": 1.4065150022506714, "learning_rate": 0.00019583172355539698, "loss": 1.5645, "step": 3336 }, { "epoch": 0.1195050763693663, "grad_norm": 1.9046217203140259, "learning_rate": 0.00019582840899996936, "loss": 1.4022, "step": 3337 }, { "epoch": 0.11954088849893459, "grad_norm": 1.5891504287719727, "learning_rate": 0.00019582509315529408, "loss": 1.7865, "step": 3338 }, { "epoch": 0.11957670062850287, "grad_norm": 2.062664270401001, "learning_rate": 0.0001958217760214158, "loss": 1.8436, "step": 3339 }, { "epoch": 0.11961251275807117, "grad_norm": 1.4193004369735718, "learning_rate": 0.00019581845759837914, "loss": 1.5553, "step": 3340 }, { "epoch": 0.11964832488763945, "grad_norm": 1.8979054689407349, "learning_rate": 0.0001958151378862287, "loss": 1.4616, "step": 3341 }, { "epoch": 0.11968413701720773, "grad_norm": 1.922569990158081, "learning_rate": 0.00019581181688500918, "loss": 1.7021, "step": 3342 }, { "epoch": 0.11971994914677601, "grad_norm": 1.8692396879196167, "learning_rate": 0.00019580849459476527, "loss": 1.5885, "step": 3343 }, { "epoch": 0.1197557612763443, "grad_norm": 1.5603022575378418, "learning_rate": 0.00019580517101554164, "loss": 1.4791, "step": 3344 }, { "epoch": 0.11979157340591258, "grad_norm": 1.3422173261642456, "learning_rate": 0.00019580184614738299, "loss": 1.626, "step": 3345 }, { "epoch": 0.11982738553548086, "grad_norm": 1.8114674091339111, "learning_rate": 0.0001957985199903341, "loss": 1.5095, "step": 3346 }, { "epoch": 0.11986319766504915, "grad_norm": 1.594317078590393, "learning_rate": 0.00019579519254443967, "loss": 1.7669, "step": 3347 }, { "epoch": 0.11989900979461744, "grad_norm": 1.1270697116851807, "learning_rate": 0.00019579186380974455, "loss": 1.3657, "step": 3348 }, { "epoch": 0.11993482192418572, "grad_norm": 1.7906159162521362, "learning_rate": 0.0001957885337862934, "loss": 1.7797, "step": 3349 }, { "epoch": 0.11997063405375401, "grad_norm": 1.6569995880126953, "learning_rate": 0.00019578520247413113, "loss": 1.7222, "step": 3350 }, { "epoch": 0.12000644618332229, "grad_norm": 1.458554744720459, "learning_rate": 0.0001957818698733025, "loss": 1.7073, "step": 3351 }, { "epoch": 0.12004225831289057, "grad_norm": 1.9747871160507202, "learning_rate": 0.00019577853598385235, "loss": 1.495, "step": 3352 }, { "epoch": 0.12007807044245886, "grad_norm": 1.7646852731704712, "learning_rate": 0.00019577520080582556, "loss": 1.6767, "step": 3353 }, { "epoch": 0.12011388257202714, "grad_norm": 1.4243603944778442, "learning_rate": 0.00019577186433926698, "loss": 1.8968, "step": 3354 }, { "epoch": 0.12014969470159544, "grad_norm": 1.5458492040634155, "learning_rate": 0.00019576852658422146, "loss": 1.7073, "step": 3355 }, { "epoch": 0.12018550683116372, "grad_norm": 2.496417760848999, "learning_rate": 0.000195765187540734, "loss": 2.0073, "step": 3356 }, { "epoch": 0.120221318960732, "grad_norm": 2.2858903408050537, "learning_rate": 0.00019576184720884946, "loss": 1.893, "step": 3357 }, { "epoch": 0.12025713109030028, "grad_norm": 1.6809884309768677, "learning_rate": 0.00019575850558861278, "loss": 1.597, "step": 3358 }, { "epoch": 0.12029294321986857, "grad_norm": 2.288938283920288, "learning_rate": 0.00019575516268006892, "loss": 1.594, "step": 3359 }, { "epoch": 0.12032875534943685, "grad_norm": 1.5342596769332886, "learning_rate": 0.00019575181848326289, "loss": 1.4432, "step": 3360 }, { "epoch": 0.12036456747900513, "grad_norm": 1.6115968227386475, "learning_rate": 0.00019574847299823965, "loss": 1.6213, "step": 3361 }, { "epoch": 0.12040037960857343, "grad_norm": 1.2919033765792847, "learning_rate": 0.00019574512622504416, "loss": 1.6027, "step": 3362 }, { "epoch": 0.12043619173814171, "grad_norm": 1.2876076698303223, "learning_rate": 0.00019574177816372154, "loss": 1.541, "step": 3363 }, { "epoch": 0.12047200386771, "grad_norm": 1.2301689386367798, "learning_rate": 0.0001957384288143168, "loss": 1.733, "step": 3364 }, { "epoch": 0.12050781599727828, "grad_norm": 2.5293233394622803, "learning_rate": 0.000195735078176875, "loss": 1.7144, "step": 3365 }, { "epoch": 0.12054362812684656, "grad_norm": 2.2179653644561768, "learning_rate": 0.0001957317262514412, "loss": 1.673, "step": 3366 }, { "epoch": 0.12057944025641484, "grad_norm": 2.00532603263855, "learning_rate": 0.00019572837303806048, "loss": 1.7745, "step": 3367 }, { "epoch": 0.12061525238598313, "grad_norm": 1.687103271484375, "learning_rate": 0.00019572501853677802, "loss": 1.4363, "step": 3368 }, { "epoch": 0.12065106451555142, "grad_norm": 1.7787339687347412, "learning_rate": 0.0001957216627476389, "loss": 1.5124, "step": 3369 }, { "epoch": 0.1206868766451197, "grad_norm": 2.2750205993652344, "learning_rate": 0.0001957183056706883, "loss": 1.6114, "step": 3370 }, { "epoch": 0.12072268877468799, "grad_norm": 2.279995918273926, "learning_rate": 0.0001957149473059713, "loss": 1.5944, "step": 3371 }, { "epoch": 0.12075850090425627, "grad_norm": 1.3898695707321167, "learning_rate": 0.0001957115876535332, "loss": 1.7184, "step": 3372 }, { "epoch": 0.12079431303382455, "grad_norm": 1.9735623598098755, "learning_rate": 0.00019570822671341915, "loss": 1.6018, "step": 3373 }, { "epoch": 0.12083012516339284, "grad_norm": 1.697853684425354, "learning_rate": 0.00019570486448567437, "loss": 1.6601, "step": 3374 }, { "epoch": 0.12086593729296112, "grad_norm": 1.8338154554367065, "learning_rate": 0.00019570150097034404, "loss": 1.7257, "step": 3375 }, { "epoch": 0.12090174942252942, "grad_norm": 1.4777474403381348, "learning_rate": 0.0001956981361674735, "loss": 1.7592, "step": 3376 }, { "epoch": 0.1209375615520977, "grad_norm": 1.6526165008544922, "learning_rate": 0.00019569477007710798, "loss": 1.714, "step": 3377 }, { "epoch": 0.12097337368166598, "grad_norm": 1.5610233545303345, "learning_rate": 0.00019569140269929276, "loss": 1.7781, "step": 3378 }, { "epoch": 0.12100918581123427, "grad_norm": 1.9586926698684692, "learning_rate": 0.00019568803403407315, "loss": 1.7737, "step": 3379 }, { "epoch": 0.12104499794080255, "grad_norm": 1.7385483980178833, "learning_rate": 0.00019568466408149447, "loss": 1.3575, "step": 3380 }, { "epoch": 0.12108081007037083, "grad_norm": 1.839732050895691, "learning_rate": 0.00019568129284160203, "loss": 1.5505, "step": 3381 }, { "epoch": 0.12111662219993911, "grad_norm": 1.285846471786499, "learning_rate": 0.00019567792031444125, "loss": 1.4664, "step": 3382 }, { "epoch": 0.12115243432950741, "grad_norm": 1.6386572122573853, "learning_rate": 0.00019567454650005749, "loss": 1.6598, "step": 3383 }, { "epoch": 0.1211882464590757, "grad_norm": 1.7839397192001343, "learning_rate": 0.00019567117139849605, "loss": 1.7173, "step": 3384 }, { "epoch": 0.12122405858864398, "grad_norm": 1.632829189300537, "learning_rate": 0.00019566779500980247, "loss": 1.3093, "step": 3385 }, { "epoch": 0.12125987071821226, "grad_norm": 1.8660407066345215, "learning_rate": 0.00019566441733402207, "loss": 1.2346, "step": 3386 }, { "epoch": 0.12129568284778054, "grad_norm": 1.885238766670227, "learning_rate": 0.00019566103837120036, "loss": 1.3926, "step": 3387 }, { "epoch": 0.12133149497734882, "grad_norm": 1.5017540454864502, "learning_rate": 0.00019565765812138274, "loss": 1.4036, "step": 3388 }, { "epoch": 0.12136730710691711, "grad_norm": 1.5726633071899414, "learning_rate": 0.00019565427658461474, "loss": 1.624, "step": 3389 }, { "epoch": 0.1214031192364854, "grad_norm": 1.7805520296096802, "learning_rate": 0.00019565089376094184, "loss": 1.7118, "step": 3390 }, { "epoch": 0.12143893136605369, "grad_norm": 1.9089933633804321, "learning_rate": 0.0001956475096504095, "loss": 1.7371, "step": 3391 }, { "epoch": 0.12147474349562197, "grad_norm": 1.7601195573806763, "learning_rate": 0.00019564412425306338, "loss": 1.9506, "step": 3392 }, { "epoch": 0.12151055562519025, "grad_norm": 1.944966197013855, "learning_rate": 0.00019564073756894889, "loss": 1.8287, "step": 3393 }, { "epoch": 0.12154636775475854, "grad_norm": 2.336825370788574, "learning_rate": 0.00019563734959811163, "loss": 1.5147, "step": 3394 }, { "epoch": 0.12158217988432682, "grad_norm": 2.415855646133423, "learning_rate": 0.00019563396034059724, "loss": 1.4703, "step": 3395 }, { "epoch": 0.1216179920138951, "grad_norm": 1.4387824535369873, "learning_rate": 0.00019563056979645123, "loss": 1.4512, "step": 3396 }, { "epoch": 0.12165380414346338, "grad_norm": 2.0192601680755615, "learning_rate": 0.00019562717796571929, "loss": 1.536, "step": 3397 }, { "epoch": 0.12168961627303168, "grad_norm": 1.5521689653396606, "learning_rate": 0.00019562378484844697, "loss": 1.6861, "step": 3398 }, { "epoch": 0.12172542840259996, "grad_norm": 1.3953551054000854, "learning_rate": 0.00019562039044468, "loss": 1.713, "step": 3399 }, { "epoch": 0.12176124053216825, "grad_norm": 2.014117956161499, "learning_rate": 0.00019561699475446401, "loss": 1.4568, "step": 3400 }, { "epoch": 0.12179705266173653, "grad_norm": 1.4220621585845947, "learning_rate": 0.00019561359777784472, "loss": 1.4072, "step": 3401 }, { "epoch": 0.12183286479130481, "grad_norm": 1.6513803005218506, "learning_rate": 0.0001956101995148678, "loss": 1.6897, "step": 3402 }, { "epoch": 0.1218686769208731, "grad_norm": 1.61029052734375, "learning_rate": 0.00019560679996557894, "loss": 1.6601, "step": 3403 }, { "epoch": 0.12190448905044138, "grad_norm": 1.2982370853424072, "learning_rate": 0.00019560339913002396, "loss": 1.6648, "step": 3404 }, { "epoch": 0.12194030118000967, "grad_norm": 1.9422756433486938, "learning_rate": 0.00019559999700824852, "loss": 1.8368, "step": 3405 }, { "epoch": 0.12197611330957796, "grad_norm": 1.4068212509155273, "learning_rate": 0.00019559659360029845, "loss": 1.681, "step": 3406 }, { "epoch": 0.12201192543914624, "grad_norm": 2.2662994861602783, "learning_rate": 0.0001955931889062195, "loss": 1.7048, "step": 3407 }, { "epoch": 0.12204773756871452, "grad_norm": 1.8388558626174927, "learning_rate": 0.00019558978292605754, "loss": 1.4593, "step": 3408 }, { "epoch": 0.1220835496982828, "grad_norm": 2.5157718658447266, "learning_rate": 0.00019558637565985834, "loss": 1.5091, "step": 3409 }, { "epoch": 0.12211936182785109, "grad_norm": 1.798851728439331, "learning_rate": 0.00019558296710766774, "loss": 1.5104, "step": 3410 }, { "epoch": 0.12215517395741937, "grad_norm": 1.3769530057907104, "learning_rate": 0.00019557955726953163, "loss": 1.516, "step": 3411 }, { "epoch": 0.12219098608698767, "grad_norm": 1.429823398590088, "learning_rate": 0.00019557614614549586, "loss": 1.6896, "step": 3412 }, { "epoch": 0.12222679821655595, "grad_norm": 2.1437742710113525, "learning_rate": 0.00019557273373560632, "loss": 1.8087, "step": 3413 }, { "epoch": 0.12226261034612423, "grad_norm": 1.2321090698242188, "learning_rate": 0.00019556932003990892, "loss": 1.331, "step": 3414 }, { "epoch": 0.12229842247569252, "grad_norm": 1.5394012928009033, "learning_rate": 0.0001955659050584496, "loss": 1.6945, "step": 3415 }, { "epoch": 0.1223342346052608, "grad_norm": 1.67526376247406, "learning_rate": 0.0001955624887912743, "loss": 1.3599, "step": 3416 }, { "epoch": 0.12237004673482908, "grad_norm": 1.7456480264663696, "learning_rate": 0.00019555907123842902, "loss": 1.6028, "step": 3417 }, { "epoch": 0.12240585886439737, "grad_norm": 1.8807971477508545, "learning_rate": 0.00019555565239995966, "loss": 1.4512, "step": 3418 }, { "epoch": 0.12244167099396566, "grad_norm": 1.4449418783187866, "learning_rate": 0.00019555223227591225, "loss": 1.6143, "step": 3419 }, { "epoch": 0.12247748312353395, "grad_norm": 1.2141433954238892, "learning_rate": 0.0001955488108663328, "loss": 1.6618, "step": 3420 }, { "epoch": 0.12251329525310223, "grad_norm": 1.769530177116394, "learning_rate": 0.00019554538817126739, "loss": 1.5924, "step": 3421 }, { "epoch": 0.12254910738267051, "grad_norm": 1.607627272605896, "learning_rate": 0.000195541964190762, "loss": 1.64, "step": 3422 }, { "epoch": 0.1225849195122388, "grad_norm": 2.0021603107452393, "learning_rate": 0.00019553853892486273, "loss": 1.5966, "step": 3423 }, { "epoch": 0.12262073164180708, "grad_norm": 1.4065313339233398, "learning_rate": 0.00019553511237361564, "loss": 1.7312, "step": 3424 }, { "epoch": 0.12265654377137536, "grad_norm": 2.360572338104248, "learning_rate": 0.00019553168453706685, "loss": 1.6099, "step": 3425 }, { "epoch": 0.12269235590094366, "grad_norm": 1.807173728942871, "learning_rate": 0.00019552825541526247, "loss": 1.4694, "step": 3426 }, { "epoch": 0.12272816803051194, "grad_norm": 1.3030319213867188, "learning_rate": 0.00019552482500824865, "loss": 1.8069, "step": 3427 }, { "epoch": 0.12276398016008022, "grad_norm": 1.7905423641204834, "learning_rate": 0.0001955213933160715, "loss": 1.5229, "step": 3428 }, { "epoch": 0.1227997922896485, "grad_norm": 2.1524462699890137, "learning_rate": 0.00019551796033877726, "loss": 1.3083, "step": 3429 }, { "epoch": 0.12283560441921679, "grad_norm": 1.6406817436218262, "learning_rate": 0.00019551452607641205, "loss": 1.8781, "step": 3430 }, { "epoch": 0.12287141654878507, "grad_norm": 1.5875840187072754, "learning_rate": 0.0001955110905290221, "loss": 1.9099, "step": 3431 }, { "epoch": 0.12290722867835335, "grad_norm": 1.7724831104278564, "learning_rate": 0.00019550765369665362, "loss": 1.4552, "step": 3432 }, { "epoch": 0.12294304080792165, "grad_norm": 1.4344149827957153, "learning_rate": 0.00019550421557935286, "loss": 1.7283, "step": 3433 }, { "epoch": 0.12297885293748993, "grad_norm": 1.5454742908477783, "learning_rate": 0.00019550077617716606, "loss": 1.7259, "step": 3434 }, { "epoch": 0.12301466506705822, "grad_norm": 1.3281104564666748, "learning_rate": 0.00019549733549013954, "loss": 1.674, "step": 3435 }, { "epoch": 0.1230504771966265, "grad_norm": 1.5898027420043945, "learning_rate": 0.0001954938935183195, "loss": 1.7844, "step": 3436 }, { "epoch": 0.12308628932619478, "grad_norm": 1.7751864194869995, "learning_rate": 0.00019549045026175232, "loss": 1.6153, "step": 3437 }, { "epoch": 0.12312210145576306, "grad_norm": 1.5239864587783813, "learning_rate": 0.00019548700572048433, "loss": 1.5856, "step": 3438 }, { "epoch": 0.12315791358533135, "grad_norm": 1.585067629814148, "learning_rate": 0.00019548355989456182, "loss": 1.5034, "step": 3439 }, { "epoch": 0.12319372571489964, "grad_norm": 1.449587345123291, "learning_rate": 0.0001954801127840312, "loss": 1.4557, "step": 3440 }, { "epoch": 0.12322953784446793, "grad_norm": 1.8552396297454834, "learning_rate": 0.00019547666438893879, "loss": 1.4238, "step": 3441 }, { "epoch": 0.12326534997403621, "grad_norm": 1.5274230241775513, "learning_rate": 0.00019547321470933103, "loss": 1.4542, "step": 3442 }, { "epoch": 0.12330116210360449, "grad_norm": 1.3685420751571655, "learning_rate": 0.00019546976374525433, "loss": 1.5404, "step": 3443 }, { "epoch": 0.12333697423317277, "grad_norm": 1.6194121837615967, "learning_rate": 0.0001954663114967551, "loss": 1.2244, "step": 3444 }, { "epoch": 0.12337278636274106, "grad_norm": 1.4230560064315796, "learning_rate": 0.0001954628579638798, "loss": 1.0127, "step": 3445 }, { "epoch": 0.12340859849230934, "grad_norm": 1.3849716186523438, "learning_rate": 0.0001954594031466749, "loss": 1.425, "step": 3446 }, { "epoch": 0.12344441062187762, "grad_norm": 1.335742473602295, "learning_rate": 0.00019545594704518682, "loss": 1.7514, "step": 3447 }, { "epoch": 0.12348022275144592, "grad_norm": 1.256226897239685, "learning_rate": 0.00019545248965946216, "loss": 1.6755, "step": 3448 }, { "epoch": 0.1235160348810142, "grad_norm": 1.8748904466629028, "learning_rate": 0.00019544903098954732, "loss": 1.516, "step": 3449 }, { "epoch": 0.12355184701058249, "grad_norm": 1.9898053407669067, "learning_rate": 0.0001954455710354889, "loss": 1.5757, "step": 3450 }, { "epoch": 0.12358765914015077, "grad_norm": 2.141721725463867, "learning_rate": 0.00019544210979733343, "loss": 1.9781, "step": 3451 }, { "epoch": 0.12362347126971905, "grad_norm": 1.461661696434021, "learning_rate": 0.0001954386472751275, "loss": 1.8818, "step": 3452 }, { "epoch": 0.12365928339928733, "grad_norm": 2.044703245162964, "learning_rate": 0.0001954351834689177, "loss": 1.8225, "step": 3453 }, { "epoch": 0.12369509552885562, "grad_norm": 2.4182326793670654, "learning_rate": 0.0001954317183787506, "loss": 1.6916, "step": 3454 }, { "epoch": 0.12373090765842391, "grad_norm": 1.5588754415512085, "learning_rate": 0.00019542825200467279, "loss": 1.7322, "step": 3455 }, { "epoch": 0.1237667197879922, "grad_norm": 1.1768525838851929, "learning_rate": 0.00019542478434673096, "loss": 1.5594, "step": 3456 }, { "epoch": 0.12380253191756048, "grad_norm": 1.7234669923782349, "learning_rate": 0.00019542131540497174, "loss": 1.7051, "step": 3457 }, { "epoch": 0.12383834404712876, "grad_norm": 1.9066272974014282, "learning_rate": 0.00019541784517944182, "loss": 1.7679, "step": 3458 }, { "epoch": 0.12387415617669705, "grad_norm": 1.688675045967102, "learning_rate": 0.0001954143736701879, "loss": 1.2632, "step": 3459 }, { "epoch": 0.12390996830626533, "grad_norm": 1.6447263956069946, "learning_rate": 0.0001954109008772566, "loss": 1.7467, "step": 3460 }, { "epoch": 0.12394578043583361, "grad_norm": 1.7118195295333862, "learning_rate": 0.00019540742680069473, "loss": 1.6544, "step": 3461 }, { "epoch": 0.12398159256540191, "grad_norm": 2.0889880657196045, "learning_rate": 0.000195403951440549, "loss": 1.8585, "step": 3462 }, { "epoch": 0.12401740469497019, "grad_norm": 1.5095477104187012, "learning_rate": 0.00019540047479686616, "loss": 1.5888, "step": 3463 }, { "epoch": 0.12405321682453847, "grad_norm": 1.3710155487060547, "learning_rate": 0.00019539699686969302, "loss": 1.4073, "step": 3464 }, { "epoch": 0.12408902895410676, "grad_norm": 1.261867642402649, "learning_rate": 0.0001953935176590763, "loss": 1.7322, "step": 3465 }, { "epoch": 0.12412484108367504, "grad_norm": 1.395007848739624, "learning_rate": 0.00019539003716506287, "loss": 1.5827, "step": 3466 }, { "epoch": 0.12416065321324332, "grad_norm": 1.8088184595108032, "learning_rate": 0.0001953865553876995, "loss": 2.0731, "step": 3467 }, { "epoch": 0.1241964653428116, "grad_norm": 2.2658514976501465, "learning_rate": 0.00019538307232703313, "loss": 1.6118, "step": 3468 }, { "epoch": 0.1242322774723799, "grad_norm": 1.3344290256500244, "learning_rate": 0.0001953795879831105, "loss": 1.6023, "step": 3469 }, { "epoch": 0.12426808960194818, "grad_norm": 1.413332462310791, "learning_rate": 0.00019537610235597857, "loss": 1.5516, "step": 3470 }, { "epoch": 0.12430390173151647, "grad_norm": 1.3064593076705933, "learning_rate": 0.00019537261544568421, "loss": 1.3519, "step": 3471 }, { "epoch": 0.12433971386108475, "grad_norm": 4.2692060470581055, "learning_rate": 0.00019536912725227432, "loss": 1.7083, "step": 3472 }, { "epoch": 0.12437552599065303, "grad_norm": 1.505118489265442, "learning_rate": 0.00019536563777579585, "loss": 1.597, "step": 3473 }, { "epoch": 0.12441133812022132, "grad_norm": 1.7473787069320679, "learning_rate": 0.0001953621470162957, "loss": 1.7033, "step": 3474 }, { "epoch": 0.1244471502497896, "grad_norm": 1.8221546411514282, "learning_rate": 0.00019535865497382094, "loss": 1.4684, "step": 3475 }, { "epoch": 0.1244829623793579, "grad_norm": 1.8060365915298462, "learning_rate": 0.00019535516164841842, "loss": 1.6477, "step": 3476 }, { "epoch": 0.12451877450892618, "grad_norm": 1.790130853652954, "learning_rate": 0.00019535166704013522, "loss": 1.5853, "step": 3477 }, { "epoch": 0.12455458663849446, "grad_norm": 2.9032599925994873, "learning_rate": 0.00019534817114901833, "loss": 1.7533, "step": 3478 }, { "epoch": 0.12459039876806274, "grad_norm": 1.4391133785247803, "learning_rate": 0.0001953446739751148, "loss": 1.6817, "step": 3479 }, { "epoch": 0.12462621089763103, "grad_norm": 1.8639107942581177, "learning_rate": 0.00019534117551847166, "loss": 1.6716, "step": 3480 }, { "epoch": 0.12466202302719931, "grad_norm": 1.7020066976547241, "learning_rate": 0.000195337675779136, "loss": 1.7672, "step": 3481 }, { "epoch": 0.12469783515676759, "grad_norm": 2.505441188812256, "learning_rate": 0.00019533417475715487, "loss": 1.6561, "step": 3482 }, { "epoch": 0.12473364728633589, "grad_norm": 1.5666439533233643, "learning_rate": 0.0001953306724525754, "loss": 1.6065, "step": 3483 }, { "epoch": 0.12476945941590417, "grad_norm": 1.5428463220596313, "learning_rate": 0.00019532716886544468, "loss": 1.4111, "step": 3484 }, { "epoch": 0.12480527154547245, "grad_norm": 1.5650322437286377, "learning_rate": 0.0001953236639958099, "loss": 1.5319, "step": 3485 }, { "epoch": 0.12484108367504074, "grad_norm": 1.1498960256576538, "learning_rate": 0.00019532015784371818, "loss": 1.8039, "step": 3486 }, { "epoch": 0.12487689580460902, "grad_norm": 1.6411759853363037, "learning_rate": 0.00019531665040921668, "loss": 1.812, "step": 3487 }, { "epoch": 0.1249127079341773, "grad_norm": 1.3807140588760376, "learning_rate": 0.00019531314169235259, "loss": 1.1548, "step": 3488 }, { "epoch": 0.12494852006374559, "grad_norm": 1.5563311576843262, "learning_rate": 0.00019530963169317312, "loss": 1.2558, "step": 3489 }, { "epoch": 0.12498433219331388, "grad_norm": 1.7699103355407715, "learning_rate": 0.0001953061204117255, "loss": 1.6063, "step": 3490 }, { "epoch": 0.12502014432288217, "grad_norm": 1.5972926616668701, "learning_rate": 0.00019530260784805697, "loss": 1.3708, "step": 3491 }, { "epoch": 0.12505595645245043, "grad_norm": 1.3855398893356323, "learning_rate": 0.00019529909400221475, "loss": 1.3582, "step": 3492 }, { "epoch": 0.12509176858201873, "grad_norm": 1.3573073148727417, "learning_rate": 0.00019529557887424618, "loss": 1.8298, "step": 3493 }, { "epoch": 0.12512758071158703, "grad_norm": 1.5848575830459595, "learning_rate": 0.00019529206246419854, "loss": 1.5766, "step": 3494 }, { "epoch": 0.1251633928411553, "grad_norm": 1.3935266733169556, "learning_rate": 0.00019528854477211908, "loss": 1.5095, "step": 3495 }, { "epoch": 0.1251992049707236, "grad_norm": 1.3671562671661377, "learning_rate": 0.0001952850257980552, "loss": 1.5969, "step": 3496 }, { "epoch": 0.12523501710029186, "grad_norm": 1.2846484184265137, "learning_rate": 0.00019528150554205419, "loss": 1.4534, "step": 3497 }, { "epoch": 0.12527082922986016, "grad_norm": 1.6687588691711426, "learning_rate": 0.00019527798400416338, "loss": 1.5611, "step": 3498 }, { "epoch": 0.12530664135942843, "grad_norm": 1.379241704940796, "learning_rate": 0.00019527446118443025, "loss": 1.4491, "step": 3499 }, { "epoch": 0.12534245348899672, "grad_norm": 1.4438045024871826, "learning_rate": 0.00019527093708290215, "loss": 1.6858, "step": 3500 }, { "epoch": 0.12537826561856502, "grad_norm": 1.3740583658218384, "learning_rate": 0.00019526741169962643, "loss": 1.8549, "step": 3501 }, { "epoch": 0.1254140777481333, "grad_norm": 2.178952932357788, "learning_rate": 0.00019526388503465062, "loss": 1.8759, "step": 3502 }, { "epoch": 0.1254498898777016, "grad_norm": 1.6737861633300781, "learning_rate": 0.00019526035708802207, "loss": 1.5266, "step": 3503 }, { "epoch": 0.12548570200726986, "grad_norm": 1.7721725702285767, "learning_rate": 0.00019525682785978833, "loss": 1.7037, "step": 3504 }, { "epoch": 0.12552151413683815, "grad_norm": 1.7052780389785767, "learning_rate": 0.00019525329734999683, "loss": 1.7986, "step": 3505 }, { "epoch": 0.12555732626640642, "grad_norm": 1.8097865581512451, "learning_rate": 0.0001952497655586951, "loss": 1.397, "step": 3506 }, { "epoch": 0.12559313839597472, "grad_norm": 1.6215720176696777, "learning_rate": 0.00019524623248593062, "loss": 1.4687, "step": 3507 }, { "epoch": 0.12562895052554302, "grad_norm": 1.4546492099761963, "learning_rate": 0.00019524269813175096, "loss": 1.3458, "step": 3508 }, { "epoch": 0.12566476265511128, "grad_norm": 2.1986570358276367, "learning_rate": 0.00019523916249620363, "loss": 1.5225, "step": 3509 }, { "epoch": 0.12570057478467958, "grad_norm": 3.118424892425537, "learning_rate": 0.0001952356255793362, "loss": 1.679, "step": 3510 }, { "epoch": 0.12573638691424785, "grad_norm": 1.657961368560791, "learning_rate": 0.00019523208738119632, "loss": 1.482, "step": 3511 }, { "epoch": 0.12577219904381615, "grad_norm": 1.8398610353469849, "learning_rate": 0.00019522854790183152, "loss": 1.649, "step": 3512 }, { "epoch": 0.12580801117338442, "grad_norm": 1.3558249473571777, "learning_rate": 0.00019522500714128942, "loss": 1.5596, "step": 3513 }, { "epoch": 0.1258438233029527, "grad_norm": 1.444551706314087, "learning_rate": 0.0001952214650996177, "loss": 1.3201, "step": 3514 }, { "epoch": 0.125879635432521, "grad_norm": 1.4728481769561768, "learning_rate": 0.000195217921776864, "loss": 1.6087, "step": 3515 }, { "epoch": 0.12591544756208928, "grad_norm": 1.3532110452651978, "learning_rate": 0.000195214377173076, "loss": 1.6132, "step": 3516 }, { "epoch": 0.12595125969165757, "grad_norm": 1.7189764976501465, "learning_rate": 0.00019521083128830137, "loss": 1.508, "step": 3517 }, { "epoch": 0.12598707182122584, "grad_norm": 1.3803995847702026, "learning_rate": 0.0001952072841225878, "loss": 1.7025, "step": 3518 }, { "epoch": 0.12602288395079414, "grad_norm": 1.9265297651290894, "learning_rate": 0.00019520373567598304, "loss": 1.4115, "step": 3519 }, { "epoch": 0.1260586960803624, "grad_norm": 2.2055609226226807, "learning_rate": 0.0001952001859485348, "loss": 1.8266, "step": 3520 }, { "epoch": 0.1260945082099307, "grad_norm": 1.7643824815750122, "learning_rate": 0.0001951966349402909, "loss": 1.5893, "step": 3521 }, { "epoch": 0.12613032033949897, "grad_norm": 2.026763677597046, "learning_rate": 0.00019519308265129903, "loss": 1.4778, "step": 3522 }, { "epoch": 0.12616613246906727, "grad_norm": 1.664231538772583, "learning_rate": 0.00019518952908160705, "loss": 1.6736, "step": 3523 }, { "epoch": 0.12620194459863557, "grad_norm": 1.765260934829712, "learning_rate": 0.00019518597423126273, "loss": 1.665, "step": 3524 }, { "epoch": 0.12623775672820384, "grad_norm": 2.3637137413024902, "learning_rate": 0.0001951824181003139, "loss": 1.3418, "step": 3525 }, { "epoch": 0.12627356885777213, "grad_norm": 1.4499986171722412, "learning_rate": 0.00019517886068880843, "loss": 1.6264, "step": 3526 }, { "epoch": 0.1263093809873404, "grad_norm": 1.7675611972808838, "learning_rate": 0.00019517530199679415, "loss": 1.6114, "step": 3527 }, { "epoch": 0.1263451931169087, "grad_norm": 1.6797280311584473, "learning_rate": 0.00019517174202431895, "loss": 1.4893, "step": 3528 }, { "epoch": 0.12638100524647697, "grad_norm": 1.2751450538635254, "learning_rate": 0.00019516818077143071, "loss": 1.6812, "step": 3529 }, { "epoch": 0.12641681737604527, "grad_norm": 2.196251630783081, "learning_rate": 0.00019516461823817737, "loss": 1.3997, "step": 3530 }, { "epoch": 0.12645262950561356, "grad_norm": 2.467259645462036, "learning_rate": 0.00019516105442460684, "loss": 1.4916, "step": 3531 }, { "epoch": 0.12648844163518183, "grad_norm": 1.8031963109970093, "learning_rate": 0.0001951574893307671, "loss": 1.8051, "step": 3532 }, { "epoch": 0.12652425376475013, "grad_norm": 1.3685870170593262, "learning_rate": 0.00019515392295670604, "loss": 1.4656, "step": 3533 }, { "epoch": 0.1265600658943184, "grad_norm": 1.3247365951538086, "learning_rate": 0.00019515035530247172, "loss": 1.5153, "step": 3534 }, { "epoch": 0.1265958780238867, "grad_norm": 1.341870665550232, "learning_rate": 0.0001951467863681121, "loss": 1.5531, "step": 3535 }, { "epoch": 0.12663169015345496, "grad_norm": 1.5447837114334106, "learning_rate": 0.00019514321615367517, "loss": 1.8168, "step": 3536 }, { "epoch": 0.12666750228302326, "grad_norm": 1.3823332786560059, "learning_rate": 0.000195139644659209, "loss": 1.6153, "step": 3537 }, { "epoch": 0.12670331441259156, "grad_norm": 1.7624273300170898, "learning_rate": 0.00019513607188476168, "loss": 1.5328, "step": 3538 }, { "epoch": 0.12673912654215982, "grad_norm": 1.4588311910629272, "learning_rate": 0.00019513249783038118, "loss": 1.7005, "step": 3539 }, { "epoch": 0.12677493867172812, "grad_norm": 2.036802291870117, "learning_rate": 0.00019512892249611566, "loss": 1.8805, "step": 3540 }, { "epoch": 0.1268107508012964, "grad_norm": 2.2579665184020996, "learning_rate": 0.00019512534588201318, "loss": 1.3699, "step": 3541 }, { "epoch": 0.1268465629308647, "grad_norm": 2.078040599822998, "learning_rate": 0.00019512176798812189, "loss": 1.6924, "step": 3542 }, { "epoch": 0.12688237506043296, "grad_norm": 1.5221152305603027, "learning_rate": 0.0001951181888144899, "loss": 1.8049, "step": 3543 }, { "epoch": 0.12691818719000125, "grad_norm": 1.3810714483261108, "learning_rate": 0.00019511460836116537, "loss": 1.6305, "step": 3544 }, { "epoch": 0.12695399931956955, "grad_norm": 1.5885570049285889, "learning_rate": 0.00019511102662819648, "loss": 1.6947, "step": 3545 }, { "epoch": 0.12698981144913782, "grad_norm": 2.6954009532928467, "learning_rate": 0.0001951074436156314, "loss": 1.5527, "step": 3546 }, { "epoch": 0.12702562357870611, "grad_norm": 2.2410197257995605, "learning_rate": 0.00019510385932351837, "loss": 1.5252, "step": 3547 }, { "epoch": 0.12706143570827438, "grad_norm": 1.5985091924667358, "learning_rate": 0.00019510027375190556, "loss": 1.6583, "step": 3548 }, { "epoch": 0.12709724783784268, "grad_norm": 1.7822718620300293, "learning_rate": 0.00019509668690084126, "loss": 1.5017, "step": 3549 }, { "epoch": 0.12713305996741095, "grad_norm": 1.6479328870773315, "learning_rate": 0.00019509309877037369, "loss": 1.4035, "step": 3550 }, { "epoch": 0.12716887209697925, "grad_norm": 2.219636917114258, "learning_rate": 0.00019508950936055115, "loss": 1.8554, "step": 3551 }, { "epoch": 0.12720468422654754, "grad_norm": 2.312058925628662, "learning_rate": 0.0001950859186714219, "loss": 1.6133, "step": 3552 }, { "epoch": 0.1272404963561158, "grad_norm": 1.3517247438430786, "learning_rate": 0.00019508232670303427, "loss": 1.5924, "step": 3553 }, { "epoch": 0.1272763084856841, "grad_norm": 1.835817813873291, "learning_rate": 0.00019507873345543658, "loss": 1.4991, "step": 3554 }, { "epoch": 0.12731212061525238, "grad_norm": 1.174762487411499, "learning_rate": 0.00019507513892867717, "loss": 1.5491, "step": 3555 }, { "epoch": 0.12734793274482067, "grad_norm": 1.6903138160705566, "learning_rate": 0.0001950715431228044, "loss": 1.526, "step": 3556 }, { "epoch": 0.12738374487438894, "grad_norm": 2.1673574447631836, "learning_rate": 0.0001950679460378667, "loss": 1.7603, "step": 3557 }, { "epoch": 0.12741955700395724, "grad_norm": 1.705032229423523, "learning_rate": 0.00019506434767391237, "loss": 1.7594, "step": 3558 }, { "epoch": 0.12745536913352554, "grad_norm": 1.64605712890625, "learning_rate": 0.00019506074803098987, "loss": 1.9528, "step": 3559 }, { "epoch": 0.1274911812630938, "grad_norm": 1.3241467475891113, "learning_rate": 0.00019505714710914764, "loss": 1.5991, "step": 3560 }, { "epoch": 0.1275269933926621, "grad_norm": 2.728872299194336, "learning_rate": 0.0001950535449084341, "loss": 1.6143, "step": 3561 }, { "epoch": 0.12756280552223037, "grad_norm": 1.7435359954833984, "learning_rate": 0.0001950499414288977, "loss": 1.4116, "step": 3562 }, { "epoch": 0.12759861765179867, "grad_norm": 1.7278475761413574, "learning_rate": 0.000195046336670587, "loss": 1.6202, "step": 3563 }, { "epoch": 0.12763442978136694, "grad_norm": 1.4856594800949097, "learning_rate": 0.0001950427306335504, "loss": 1.85, "step": 3564 }, { "epoch": 0.12767024191093523, "grad_norm": 1.2689180374145508, "learning_rate": 0.00019503912331783648, "loss": 1.5936, "step": 3565 }, { "epoch": 0.12770605404050353, "grad_norm": 2.304764747619629, "learning_rate": 0.00019503551472349373, "loss": 1.5894, "step": 3566 }, { "epoch": 0.1277418661700718, "grad_norm": 1.8257946968078613, "learning_rate": 0.0001950319048505707, "loss": 1.3694, "step": 3567 }, { "epoch": 0.1277776782996401, "grad_norm": 1.283836841583252, "learning_rate": 0.000195028293699116, "loss": 1.537, "step": 3568 }, { "epoch": 0.12781349042920837, "grad_norm": 2.9690396785736084, "learning_rate": 0.0001950246812691782, "loss": 1.5115, "step": 3569 }, { "epoch": 0.12784930255877666, "grad_norm": 2.2096030712127686, "learning_rate": 0.00019502106756080583, "loss": 1.8623, "step": 3570 }, { "epoch": 0.12788511468834493, "grad_norm": 1.6043503284454346, "learning_rate": 0.00019501745257404762, "loss": 1.5075, "step": 3571 }, { "epoch": 0.12792092681791323, "grad_norm": 1.8550293445587158, "learning_rate": 0.00019501383630895211, "loss": 1.6319, "step": 3572 }, { "epoch": 0.12795673894748152, "grad_norm": 1.1330084800720215, "learning_rate": 0.00019501021876556802, "loss": 1.7045, "step": 3573 }, { "epoch": 0.1279925510770498, "grad_norm": 3.4932503700256348, "learning_rate": 0.00019500659994394398, "loss": 1.5967, "step": 3574 }, { "epoch": 0.1280283632066181, "grad_norm": 2.301506280899048, "learning_rate": 0.0001950029798441287, "loss": 1.7702, "step": 3575 }, { "epoch": 0.12806417533618636, "grad_norm": 1.4132013320922852, "learning_rate": 0.00019499935846617084, "loss": 1.8051, "step": 3576 }, { "epoch": 0.12809998746575466, "grad_norm": 1.813435435295105, "learning_rate": 0.0001949957358101192, "loss": 1.5849, "step": 3577 }, { "epoch": 0.12813579959532292, "grad_norm": 1.2465287446975708, "learning_rate": 0.00019499211187602242, "loss": 1.5895, "step": 3578 }, { "epoch": 0.12817161172489122, "grad_norm": 1.4146263599395752, "learning_rate": 0.0001949884866639293, "loss": 1.9067, "step": 3579 }, { "epoch": 0.12820742385445952, "grad_norm": 1.7771110534667969, "learning_rate": 0.00019498486017388865, "loss": 1.59, "step": 3580 }, { "epoch": 0.1282432359840278, "grad_norm": 2.2220492362976074, "learning_rate": 0.00019498123240594924, "loss": 1.4147, "step": 3581 }, { "epoch": 0.12827904811359608, "grad_norm": 2.194570779800415, "learning_rate": 0.00019497760336015984, "loss": 1.7702, "step": 3582 }, { "epoch": 0.12831486024316435, "grad_norm": 1.2068736553192139, "learning_rate": 0.0001949739730365693, "loss": 1.6503, "step": 3583 }, { "epoch": 0.12835067237273265, "grad_norm": 1.3843286037445068, "learning_rate": 0.0001949703414352265, "loss": 1.6336, "step": 3584 }, { "epoch": 0.12838648450230092, "grad_norm": 1.7587592601776123, "learning_rate": 0.0001949667085561802, "loss": 1.6254, "step": 3585 }, { "epoch": 0.12842229663186921, "grad_norm": 2.3941118717193604, "learning_rate": 0.00019496307439947937, "loss": 1.7615, "step": 3586 }, { "epoch": 0.1284581087614375, "grad_norm": 1.4519942998886108, "learning_rate": 0.00019495943896517286, "loss": 1.3183, "step": 3587 }, { "epoch": 0.12849392089100578, "grad_norm": 1.6823445558547974, "learning_rate": 0.0001949558022533096, "loss": 1.4198, "step": 3588 }, { "epoch": 0.12852973302057408, "grad_norm": 1.5577532052993774, "learning_rate": 0.00019495216426393847, "loss": 1.6609, "step": 3589 }, { "epoch": 0.12856554515014235, "grad_norm": 1.4570772647857666, "learning_rate": 0.0001949485249971085, "loss": 1.5659, "step": 3590 }, { "epoch": 0.12860135727971064, "grad_norm": 1.4191895723342896, "learning_rate": 0.00019494488445286856, "loss": 1.4708, "step": 3591 }, { "epoch": 0.1286371694092789, "grad_norm": 1.8212336301803589, "learning_rate": 0.00019494124263126766, "loss": 1.5248, "step": 3592 }, { "epoch": 0.1286729815388472, "grad_norm": 1.7171404361724854, "learning_rate": 0.00019493759953235484, "loss": 1.7069, "step": 3593 }, { "epoch": 0.1287087936684155, "grad_norm": 1.546820878982544, "learning_rate": 0.00019493395515617908, "loss": 1.3911, "step": 3594 }, { "epoch": 0.12874460579798377, "grad_norm": 1.5426114797592163, "learning_rate": 0.00019493030950278937, "loss": 1.7267, "step": 3595 }, { "epoch": 0.12878041792755207, "grad_norm": 1.4243674278259277, "learning_rate": 0.00019492666257223484, "loss": 1.8226, "step": 3596 }, { "epoch": 0.12881623005712034, "grad_norm": 1.364989161491394, "learning_rate": 0.00019492301436456447, "loss": 1.6399, "step": 3597 }, { "epoch": 0.12885204218668864, "grad_norm": 2.2802083492279053, "learning_rate": 0.00019491936487982744, "loss": 1.6192, "step": 3598 }, { "epoch": 0.1288878543162569, "grad_norm": 1.3957548141479492, "learning_rate": 0.00019491571411807274, "loss": 1.6369, "step": 3599 }, { "epoch": 0.1289236664458252, "grad_norm": 1.914682149887085, "learning_rate": 0.00019491206207934955, "loss": 1.7215, "step": 3600 }, { "epoch": 0.1289594785753935, "grad_norm": 2.625183582305908, "learning_rate": 0.00019490840876370703, "loss": 1.367, "step": 3601 }, { "epoch": 0.12899529070496177, "grad_norm": 1.3641928434371948, "learning_rate": 0.00019490475417119425, "loss": 1.7807, "step": 3602 }, { "epoch": 0.12903110283453006, "grad_norm": 1.6439179182052612, "learning_rate": 0.00019490109830186042, "loss": 1.4354, "step": 3603 }, { "epoch": 0.12906691496409833, "grad_norm": 1.4001127481460571, "learning_rate": 0.00019489744115575475, "loss": 1.5427, "step": 3604 }, { "epoch": 0.12910272709366663, "grad_norm": 1.2241640090942383, "learning_rate": 0.00019489378273292643, "loss": 1.4639, "step": 3605 }, { "epoch": 0.1291385392232349, "grad_norm": 1.4079821109771729, "learning_rate": 0.00019489012303342462, "loss": 1.6541, "step": 3606 }, { "epoch": 0.1291743513528032, "grad_norm": 2.0953893661499023, "learning_rate": 0.00019488646205729864, "loss": 1.5438, "step": 3607 }, { "epoch": 0.1292101634823715, "grad_norm": 2.2923357486724854, "learning_rate": 0.00019488279980459772, "loss": 1.6912, "step": 3608 }, { "epoch": 0.12924597561193976, "grad_norm": 1.4191970825195312, "learning_rate": 0.00019487913627537108, "loss": 1.8267, "step": 3609 }, { "epoch": 0.12928178774150806, "grad_norm": 2.6938745975494385, "learning_rate": 0.00019487547146966808, "loss": 1.7512, "step": 3610 }, { "epoch": 0.12931759987107633, "grad_norm": 2.0451958179473877, "learning_rate": 0.00019487180538753796, "loss": 1.7794, "step": 3611 }, { "epoch": 0.12935341200064462, "grad_norm": 1.861045241355896, "learning_rate": 0.0001948681380290301, "loss": 1.8672, "step": 3612 }, { "epoch": 0.1293892241302129, "grad_norm": 1.9906060695648193, "learning_rate": 0.0001948644693941938, "loss": 1.4666, "step": 3613 }, { "epoch": 0.1294250362597812, "grad_norm": 2.1408982276916504, "learning_rate": 0.00019486079948307844, "loss": 1.5275, "step": 3614 }, { "epoch": 0.12946084838934946, "grad_norm": 1.689831018447876, "learning_rate": 0.00019485712829573338, "loss": 1.6761, "step": 3615 }, { "epoch": 0.12949666051891776, "grad_norm": 1.6339595317840576, "learning_rate": 0.000194853455832208, "loss": 1.477, "step": 3616 }, { "epoch": 0.12953247264848605, "grad_norm": 1.2168866395950317, "learning_rate": 0.00019484978209255175, "loss": 1.1758, "step": 3617 }, { "epoch": 0.12956828477805432, "grad_norm": 3.002838134765625, "learning_rate": 0.00019484610707681403, "loss": 1.5886, "step": 3618 }, { "epoch": 0.12960409690762262, "grad_norm": 1.7156847715377808, "learning_rate": 0.00019484243078504428, "loss": 1.5088, "step": 3619 }, { "epoch": 0.1296399090371909, "grad_norm": 1.3578027486801147, "learning_rate": 0.00019483875321729194, "loss": 1.5774, "step": 3620 }, { "epoch": 0.12967572116675918, "grad_norm": 1.3243157863616943, "learning_rate": 0.00019483507437360653, "loss": 1.7064, "step": 3621 }, { "epoch": 0.12971153329632745, "grad_norm": 2.185899257659912, "learning_rate": 0.0001948313942540375, "loss": 1.7627, "step": 3622 }, { "epoch": 0.12974734542589575, "grad_norm": 1.9846500158309937, "learning_rate": 0.00019482771285863438, "loss": 1.5378, "step": 3623 }, { "epoch": 0.12978315755546405, "grad_norm": 2.1353330612182617, "learning_rate": 0.00019482403018744674, "loss": 1.6318, "step": 3624 }, { "epoch": 0.12981896968503231, "grad_norm": 1.6027969121932983, "learning_rate": 0.00019482034624052408, "loss": 1.935, "step": 3625 }, { "epoch": 0.1298547818146006, "grad_norm": 1.7356218099594116, "learning_rate": 0.00019481666101791594, "loss": 1.593, "step": 3626 }, { "epoch": 0.12989059394416888, "grad_norm": 1.551705002784729, "learning_rate": 0.00019481297451967195, "loss": 1.7691, "step": 3627 }, { "epoch": 0.12992640607373718, "grad_norm": 2.0163631439208984, "learning_rate": 0.0001948092867458417, "loss": 1.4032, "step": 3628 }, { "epoch": 0.12996221820330545, "grad_norm": 1.6839312314987183, "learning_rate": 0.00019480559769647477, "loss": 1.8452, "step": 3629 }, { "epoch": 0.12999803033287374, "grad_norm": 1.5859227180480957, "learning_rate": 0.00019480190737162083, "loss": 1.5909, "step": 3630 }, { "epoch": 0.13003384246244204, "grad_norm": 1.7990963459014893, "learning_rate": 0.0001947982157713295, "loss": 1.5817, "step": 3631 }, { "epoch": 0.1300696545920103, "grad_norm": 1.4903086423873901, "learning_rate": 0.00019479452289565048, "loss": 1.8922, "step": 3632 }, { "epoch": 0.1301054667215786, "grad_norm": 1.210903525352478, "learning_rate": 0.00019479082874463338, "loss": 1.5365, "step": 3633 }, { "epoch": 0.13014127885114687, "grad_norm": 1.3731626272201538, "learning_rate": 0.000194787133318328, "loss": 1.7997, "step": 3634 }, { "epoch": 0.13017709098071517, "grad_norm": 1.7975584268569946, "learning_rate": 0.000194783436616784, "loss": 1.4367, "step": 3635 }, { "epoch": 0.13021290311028344, "grad_norm": 1.2568280696868896, "learning_rate": 0.00019477973864005113, "loss": 1.8788, "step": 3636 }, { "epoch": 0.13024871523985174, "grad_norm": 1.7492560148239136, "learning_rate": 0.0001947760393881791, "loss": 1.3877, "step": 3637 }, { "epoch": 0.13028452736942003, "grad_norm": 2.4165797233581543, "learning_rate": 0.00019477233886121772, "loss": 1.7056, "step": 3638 }, { "epoch": 0.1303203394989883, "grad_norm": 1.5210332870483398, "learning_rate": 0.00019476863705921677, "loss": 1.8208, "step": 3639 }, { "epoch": 0.1303561516285566, "grad_norm": 1.419649600982666, "learning_rate": 0.00019476493398222608, "loss": 1.4835, "step": 3640 }, { "epoch": 0.13039196375812487, "grad_norm": 2.1755502223968506, "learning_rate": 0.0001947612296302954, "loss": 1.441, "step": 3641 }, { "epoch": 0.13042777588769316, "grad_norm": 1.332960844039917, "learning_rate": 0.00019475752400347464, "loss": 1.6522, "step": 3642 }, { "epoch": 0.13046358801726143, "grad_norm": 1.6385389566421509, "learning_rate": 0.00019475381710181363, "loss": 1.3294, "step": 3643 }, { "epoch": 0.13049940014682973, "grad_norm": 2.0935511589050293, "learning_rate": 0.0001947501089253622, "loss": 1.9683, "step": 3644 }, { "epoch": 0.13053521227639803, "grad_norm": 1.5718079805374146, "learning_rate": 0.00019474639947417028, "loss": 1.4642, "step": 3645 }, { "epoch": 0.1305710244059663, "grad_norm": 1.8546572923660278, "learning_rate": 0.0001947426887482878, "loss": 1.6069, "step": 3646 }, { "epoch": 0.1306068365355346, "grad_norm": 1.3184876441955566, "learning_rate": 0.0001947389767477646, "loss": 1.666, "step": 3647 }, { "epoch": 0.13064264866510286, "grad_norm": 1.41603422164917, "learning_rate": 0.00019473526347265073, "loss": 1.588, "step": 3648 }, { "epoch": 0.13067846079467116, "grad_norm": 1.5340843200683594, "learning_rate": 0.00019473154892299608, "loss": 1.848, "step": 3649 }, { "epoch": 0.13071427292423943, "grad_norm": 2.2189242839813232, "learning_rate": 0.00019472783309885057, "loss": 1.9326, "step": 3650 }, { "epoch": 0.13075008505380772, "grad_norm": 2.0368833541870117, "learning_rate": 0.0001947241160002643, "loss": 1.7715, "step": 3651 }, { "epoch": 0.13078589718337602, "grad_norm": 1.550632357597351, "learning_rate": 0.00019472039762728728, "loss": 1.5666, "step": 3652 }, { "epoch": 0.1308217093129443, "grad_norm": 1.8009685277938843, "learning_rate": 0.00019471667797996944, "loss": 1.5932, "step": 3653 }, { "epoch": 0.1308575214425126, "grad_norm": 1.6331686973571777, "learning_rate": 0.00019471295705836088, "loss": 1.4965, "step": 3654 }, { "epoch": 0.13089333357208086, "grad_norm": 1.6497012376785278, "learning_rate": 0.00019470923486251165, "loss": 1.638, "step": 3655 }, { "epoch": 0.13092914570164915, "grad_norm": 1.526160717010498, "learning_rate": 0.00019470551139247184, "loss": 1.7185, "step": 3656 }, { "epoch": 0.13096495783121742, "grad_norm": 1.3786871433258057, "learning_rate": 0.00019470178664829154, "loss": 1.6264, "step": 3657 }, { "epoch": 0.13100076996078572, "grad_norm": 1.2619601488113403, "learning_rate": 0.00019469806063002082, "loss": 1.5839, "step": 3658 }, { "epoch": 0.13103658209035401, "grad_norm": 2.778254270553589, "learning_rate": 0.0001946943333377099, "loss": 1.5246, "step": 3659 }, { "epoch": 0.13107239421992228, "grad_norm": 1.5448185205459595, "learning_rate": 0.00019469060477140886, "loss": 1.4715, "step": 3660 }, { "epoch": 0.13110820634949058, "grad_norm": 1.5126322507858276, "learning_rate": 0.00019468687493116784, "loss": 1.6236, "step": 3661 }, { "epoch": 0.13114401847905885, "grad_norm": 2.1006526947021484, "learning_rate": 0.00019468314381703708, "loss": 1.8905, "step": 3662 }, { "epoch": 0.13117983060862715, "grad_norm": 1.9070483446121216, "learning_rate": 0.00019467941142906674, "loss": 1.6353, "step": 3663 }, { "epoch": 0.13121564273819541, "grad_norm": 1.40544855594635, "learning_rate": 0.00019467567776730707, "loss": 1.5716, "step": 3664 }, { "epoch": 0.1312514548677637, "grad_norm": 1.3862591981887817, "learning_rate": 0.00019467194283180828, "loss": 1.5309, "step": 3665 }, { "epoch": 0.131287266997332, "grad_norm": 1.2076510190963745, "learning_rate": 0.0001946682066226206, "loss": 1.585, "step": 3666 }, { "epoch": 0.13132307912690028, "grad_norm": 2.244685411453247, "learning_rate": 0.0001946644691397943, "loss": 1.6969, "step": 3667 }, { "epoch": 0.13135889125646857, "grad_norm": 1.1528452634811401, "learning_rate": 0.00019466073038337968, "loss": 1.2363, "step": 3668 }, { "epoch": 0.13139470338603684, "grad_norm": 1.7110793590545654, "learning_rate": 0.00019465699035342706, "loss": 1.6413, "step": 3669 }, { "epoch": 0.13143051551560514, "grad_norm": 1.4075368642807007, "learning_rate": 0.00019465324904998672, "loss": 1.6554, "step": 3670 }, { "epoch": 0.1314663276451734, "grad_norm": 1.4957016706466675, "learning_rate": 0.000194649506473109, "loss": 1.6191, "step": 3671 }, { "epoch": 0.1315021397747417, "grad_norm": 1.4293609857559204, "learning_rate": 0.00019464576262284426, "loss": 1.3673, "step": 3672 }, { "epoch": 0.13153795190431, "grad_norm": 1.4260005950927734, "learning_rate": 0.00019464201749924288, "loss": 1.7648, "step": 3673 }, { "epoch": 0.13157376403387827, "grad_norm": 1.628811240196228, "learning_rate": 0.00019463827110235523, "loss": 1.5441, "step": 3674 }, { "epoch": 0.13160957616344657, "grad_norm": 2.7041733264923096, "learning_rate": 0.00019463452343223173, "loss": 1.7595, "step": 3675 }, { "epoch": 0.13164538829301484, "grad_norm": 4.314607620239258, "learning_rate": 0.00019463077448892278, "loss": 1.4025, "step": 3676 }, { "epoch": 0.13168120042258313, "grad_norm": 1.7181106805801392, "learning_rate": 0.0001946270242724788, "loss": 1.495, "step": 3677 }, { "epoch": 0.1317170125521514, "grad_norm": 1.686544418334961, "learning_rate": 0.0001946232727829503, "loss": 1.727, "step": 3678 }, { "epoch": 0.1317528246817197, "grad_norm": 1.6624183654785156, "learning_rate": 0.00019461952002038771, "loss": 1.7125, "step": 3679 }, { "epoch": 0.131788636811288, "grad_norm": 2.172788381576538, "learning_rate": 0.0001946157659848415, "loss": 2.2068, "step": 3680 }, { "epoch": 0.13182444894085626, "grad_norm": 1.7004787921905518, "learning_rate": 0.00019461201067636226, "loss": 1.8583, "step": 3681 }, { "epoch": 0.13186026107042456, "grad_norm": 1.7210794687271118, "learning_rate": 0.00019460825409500042, "loss": 1.7554, "step": 3682 }, { "epoch": 0.13189607319999283, "grad_norm": 1.6098159551620483, "learning_rate": 0.00019460449624080655, "loss": 1.6517, "step": 3683 }, { "epoch": 0.13193188532956113, "grad_norm": 2.078428030014038, "learning_rate": 0.00019460073711383125, "loss": 1.4448, "step": 3684 }, { "epoch": 0.1319676974591294, "grad_norm": 1.5644986629486084, "learning_rate": 0.00019459697671412503, "loss": 1.7364, "step": 3685 }, { "epoch": 0.1320035095886977, "grad_norm": 1.9476972818374634, "learning_rate": 0.0001945932150417385, "loss": 1.4418, "step": 3686 }, { "epoch": 0.132039321718266, "grad_norm": 2.0070180892944336, "learning_rate": 0.0001945894520967223, "loss": 1.8668, "step": 3687 }, { "epoch": 0.13207513384783426, "grad_norm": 2.056800127029419, "learning_rate": 0.00019458568787912703, "loss": 1.6064, "step": 3688 }, { "epoch": 0.13211094597740256, "grad_norm": 1.6706658601760864, "learning_rate": 0.00019458192238900335, "loss": 1.5147, "step": 3689 }, { "epoch": 0.13214675810697082, "grad_norm": 1.309191346168518, "learning_rate": 0.00019457815562640187, "loss": 1.1176, "step": 3690 }, { "epoch": 0.13218257023653912, "grad_norm": 1.6902896165847778, "learning_rate": 0.00019457438759137334, "loss": 1.4857, "step": 3691 }, { "epoch": 0.1322183823661074, "grad_norm": 1.9160804748535156, "learning_rate": 0.00019457061828396838, "loss": 1.4037, "step": 3692 }, { "epoch": 0.1322541944956757, "grad_norm": 1.564273476600647, "learning_rate": 0.00019456684770423777, "loss": 1.6198, "step": 3693 }, { "epoch": 0.13229000662524398, "grad_norm": 1.7293407917022705, "learning_rate": 0.00019456307585223218, "loss": 1.5622, "step": 3694 }, { "epoch": 0.13232581875481225, "grad_norm": 1.3648868799209595, "learning_rate": 0.00019455930272800243, "loss": 1.507, "step": 3695 }, { "epoch": 0.13236163088438055, "grad_norm": 1.312423825263977, "learning_rate": 0.00019455552833159918, "loss": 1.7455, "step": 3696 }, { "epoch": 0.13239744301394882, "grad_norm": 1.8402488231658936, "learning_rate": 0.00019455175266307328, "loss": 1.6363, "step": 3697 }, { "epoch": 0.13243325514351711, "grad_norm": 1.179580807685852, "learning_rate": 0.00019454797572247552, "loss": 1.6652, "step": 3698 }, { "epoch": 0.13246906727308538, "grad_norm": 1.4366496801376343, "learning_rate": 0.0001945441975098567, "loss": 1.7087, "step": 3699 }, { "epoch": 0.13250487940265368, "grad_norm": 1.816963791847229, "learning_rate": 0.00019454041802526766, "loss": 1.7501, "step": 3700 }, { "epoch": 0.13254069153222198, "grad_norm": 1.6543554067611694, "learning_rate": 0.00019453663726875923, "loss": 1.6763, "step": 3701 }, { "epoch": 0.13257650366179025, "grad_norm": 1.448135256767273, "learning_rate": 0.0001945328552403823, "loss": 1.6681, "step": 3702 }, { "epoch": 0.13261231579135854, "grad_norm": 1.8160815238952637, "learning_rate": 0.00019452907194018776, "loss": 1.5419, "step": 3703 }, { "epoch": 0.1326481279209268, "grad_norm": 1.41280198097229, "learning_rate": 0.00019452528736822646, "loss": 1.7881, "step": 3704 }, { "epoch": 0.1326839400504951, "grad_norm": 1.6581374406814575, "learning_rate": 0.00019452150152454936, "loss": 2.032, "step": 3705 }, { "epoch": 0.13271975218006338, "grad_norm": 1.9039281606674194, "learning_rate": 0.0001945177144092074, "loss": 1.3711, "step": 3706 }, { "epoch": 0.13275556430963167, "grad_norm": 2.1422500610351562, "learning_rate": 0.0001945139260222515, "loss": 1.5075, "step": 3707 }, { "epoch": 0.13279137643919997, "grad_norm": 2.3555521965026855, "learning_rate": 0.00019451013636373262, "loss": 1.6693, "step": 3708 }, { "epoch": 0.13282718856876824, "grad_norm": 1.4542937278747559, "learning_rate": 0.00019450634543370177, "loss": 1.6695, "step": 3709 }, { "epoch": 0.13286300069833654, "grad_norm": 1.687001347541809, "learning_rate": 0.00019450255323220995, "loss": 1.5687, "step": 3710 }, { "epoch": 0.1328988128279048, "grad_norm": 1.497179388999939, "learning_rate": 0.00019449875975930818, "loss": 2.1073, "step": 3711 }, { "epoch": 0.1329346249574731, "grad_norm": 2.0575437545776367, "learning_rate": 0.00019449496501504747, "loss": 1.7413, "step": 3712 }, { "epoch": 0.13297043708704137, "grad_norm": 1.6596516370773315, "learning_rate": 0.0001944911689994789, "loss": 1.4723, "step": 3713 }, { "epoch": 0.13300624921660967, "grad_norm": 1.3024641275405884, "learning_rate": 0.0001944873717126536, "loss": 1.4606, "step": 3714 }, { "epoch": 0.13304206134617794, "grad_norm": 1.9574958086013794, "learning_rate": 0.00019448357315462255, "loss": 1.7971, "step": 3715 }, { "epoch": 0.13307787347574623, "grad_norm": 1.5044560432434082, "learning_rate": 0.00019447977332543687, "loss": 1.4529, "step": 3716 }, { "epoch": 0.13311368560531453, "grad_norm": 2.2105259895324707, "learning_rate": 0.00019447597222514772, "loss": 1.9443, "step": 3717 }, { "epoch": 0.1331494977348828, "grad_norm": 2.5844597816467285, "learning_rate": 0.00019447216985380626, "loss": 1.3201, "step": 3718 }, { "epoch": 0.1331853098644511, "grad_norm": 1.6001721620559692, "learning_rate": 0.0001944683662114636, "loss": 1.6278, "step": 3719 }, { "epoch": 0.13322112199401936, "grad_norm": 1.7248762845993042, "learning_rate": 0.00019446456129817093, "loss": 1.6102, "step": 3720 }, { "epoch": 0.13325693412358766, "grad_norm": 1.187751054763794, "learning_rate": 0.00019446075511397943, "loss": 1.4295, "step": 3721 }, { "epoch": 0.13329274625315593, "grad_norm": 2.0701000690460205, "learning_rate": 0.0001944569476589403, "loss": 1.7675, "step": 3722 }, { "epoch": 0.13332855838272423, "grad_norm": 1.876980185508728, "learning_rate": 0.00019445313893310482, "loss": 1.7526, "step": 3723 }, { "epoch": 0.13336437051229252, "grad_norm": 2.1986751556396484, "learning_rate": 0.00019444932893652417, "loss": 1.3216, "step": 3724 }, { "epoch": 0.1334001826418608, "grad_norm": 2.8376963138580322, "learning_rate": 0.00019444551766924963, "loss": 1.9192, "step": 3725 }, { "epoch": 0.1334359947714291, "grad_norm": 1.2646297216415405, "learning_rate": 0.00019444170513133248, "loss": 1.6767, "step": 3726 }, { "epoch": 0.13347180690099736, "grad_norm": 1.789505124092102, "learning_rate": 0.00019443789132282403, "loss": 1.7196, "step": 3727 }, { "epoch": 0.13350761903056566, "grad_norm": 1.2923089265823364, "learning_rate": 0.0001944340762437755, "loss": 1.2456, "step": 3728 }, { "epoch": 0.13354343116013392, "grad_norm": 2.154646635055542, "learning_rate": 0.00019443025989423834, "loss": 1.5132, "step": 3729 }, { "epoch": 0.13357924328970222, "grad_norm": 1.5187957286834717, "learning_rate": 0.00019442644227426383, "loss": 1.5191, "step": 3730 }, { "epoch": 0.13361505541927052, "grad_norm": 1.3650364875793457, "learning_rate": 0.00019442262338390337, "loss": 1.3481, "step": 3731 }, { "epoch": 0.1336508675488388, "grad_norm": 2.002079963684082, "learning_rate": 0.00019441880322320824, "loss": 1.5562, "step": 3732 }, { "epoch": 0.13368667967840708, "grad_norm": 1.4634652137756348, "learning_rate": 0.00019441498179222997, "loss": 1.8203, "step": 3733 }, { "epoch": 0.13372249180797535, "grad_norm": 2.2729015350341797, "learning_rate": 0.00019441115909101986, "loss": 1.4976, "step": 3734 }, { "epoch": 0.13375830393754365, "grad_norm": 1.93559992313385, "learning_rate": 0.0001944073351196294, "loss": 1.4768, "step": 3735 }, { "epoch": 0.13379411606711192, "grad_norm": 1.4857501983642578, "learning_rate": 0.00019440350987811003, "loss": 1.5598, "step": 3736 }, { "epoch": 0.13382992819668021, "grad_norm": 1.494903564453125, "learning_rate": 0.0001943996833665132, "loss": 1.5704, "step": 3737 }, { "epoch": 0.1338657403262485, "grad_norm": 1.9642722606658936, "learning_rate": 0.0001943958555848904, "loss": 1.855, "step": 3738 }, { "epoch": 0.13390155245581678, "grad_norm": 2.271066904067993, "learning_rate": 0.00019439202653329313, "loss": 1.9326, "step": 3739 }, { "epoch": 0.13393736458538508, "grad_norm": 1.980333924293518, "learning_rate": 0.00019438819621177289, "loss": 1.9477, "step": 3740 }, { "epoch": 0.13397317671495335, "grad_norm": 1.602170467376709, "learning_rate": 0.00019438436462038125, "loss": 1.434, "step": 3741 }, { "epoch": 0.13400898884452164, "grad_norm": 2.0307059288024902, "learning_rate": 0.00019438053175916968, "loss": 1.5559, "step": 3742 }, { "epoch": 0.1340448009740899, "grad_norm": 2.126970052719116, "learning_rate": 0.00019437669762818985, "loss": 1.8426, "step": 3743 }, { "epoch": 0.1340806131036582, "grad_norm": 2.3032383918762207, "learning_rate": 0.00019437286222749326, "loss": 1.7689, "step": 3744 }, { "epoch": 0.1341164252332265, "grad_norm": 1.8578029870986938, "learning_rate": 0.00019436902555713153, "loss": 1.8085, "step": 3745 }, { "epoch": 0.13415223736279477, "grad_norm": 1.2118780612945557, "learning_rate": 0.00019436518761715632, "loss": 1.6056, "step": 3746 }, { "epoch": 0.13418804949236307, "grad_norm": 2.220696210861206, "learning_rate": 0.0001943613484076192, "loss": 1.4916, "step": 3747 }, { "epoch": 0.13422386162193134, "grad_norm": 1.5976279973983765, "learning_rate": 0.0001943575079285719, "loss": 1.6949, "step": 3748 }, { "epoch": 0.13425967375149964, "grad_norm": 1.7551590204238892, "learning_rate": 0.000194353666180066, "loss": 1.5253, "step": 3749 }, { "epoch": 0.1342954858810679, "grad_norm": 1.204624891281128, "learning_rate": 0.00019434982316215326, "loss": 1.5708, "step": 3750 }, { "epoch": 0.1343312980106362, "grad_norm": 1.5681825876235962, "learning_rate": 0.00019434597887488532, "loss": 1.4955, "step": 3751 }, { "epoch": 0.1343671101402045, "grad_norm": 1.9825056791305542, "learning_rate": 0.00019434213331831398, "loss": 1.8734, "step": 3752 }, { "epoch": 0.13440292226977277, "grad_norm": 1.5142807960510254, "learning_rate": 0.00019433828649249087, "loss": 1.4563, "step": 3753 }, { "epoch": 0.13443873439934106, "grad_norm": 1.6803392171859741, "learning_rate": 0.00019433443839746785, "loss": 1.6086, "step": 3754 }, { "epoch": 0.13447454652890933, "grad_norm": 1.265977144241333, "learning_rate": 0.00019433058903329663, "loss": 1.7027, "step": 3755 }, { "epoch": 0.13451035865847763, "grad_norm": 2.002089500427246, "learning_rate": 0.00019432673840002898, "loss": 1.8738, "step": 3756 }, { "epoch": 0.1345461707880459, "grad_norm": 1.433565378189087, "learning_rate": 0.00019432288649771676, "loss": 1.3665, "step": 3757 }, { "epoch": 0.1345819829176142, "grad_norm": 1.1154769659042358, "learning_rate": 0.0001943190333264118, "loss": 1.6499, "step": 3758 }, { "epoch": 0.1346177950471825, "grad_norm": 1.8043376207351685, "learning_rate": 0.0001943151788861659, "loss": 1.6887, "step": 3759 }, { "epoch": 0.13465360717675076, "grad_norm": 1.4192312955856323, "learning_rate": 0.0001943113231770309, "loss": 1.3811, "step": 3760 }, { "epoch": 0.13468941930631906, "grad_norm": 1.6293889284133911, "learning_rate": 0.0001943074661990587, "loss": 1.9191, "step": 3761 }, { "epoch": 0.13472523143588733, "grad_norm": 1.9068212509155273, "learning_rate": 0.0001943036079523012, "loss": 1.6092, "step": 3762 }, { "epoch": 0.13476104356545562, "grad_norm": 1.3996855020523071, "learning_rate": 0.00019429974843681032, "loss": 1.4419, "step": 3763 }, { "epoch": 0.1347968556950239, "grad_norm": 1.6263465881347656, "learning_rate": 0.0001942958876526379, "loss": 1.7413, "step": 3764 }, { "epoch": 0.1348326678245922, "grad_norm": 2.075838327407837, "learning_rate": 0.000194292025599836, "loss": 1.5274, "step": 3765 }, { "epoch": 0.1348684799541605, "grad_norm": 2.207247734069824, "learning_rate": 0.00019428816227845652, "loss": 1.9061, "step": 3766 }, { "epoch": 0.13490429208372876, "grad_norm": 1.719639778137207, "learning_rate": 0.0001942842976885514, "loss": 1.6978, "step": 3767 }, { "epoch": 0.13494010421329705, "grad_norm": 1.641737461090088, "learning_rate": 0.00019428043183017274, "loss": 1.807, "step": 3768 }, { "epoch": 0.13497591634286532, "grad_norm": 4.124390125274658, "learning_rate": 0.00019427656470337242, "loss": 1.825, "step": 3769 }, { "epoch": 0.13501172847243362, "grad_norm": 1.5481641292572021, "learning_rate": 0.00019427269630820258, "loss": 1.5993, "step": 3770 }, { "epoch": 0.1350475406020019, "grad_norm": 1.4922468662261963, "learning_rate": 0.00019426882664471515, "loss": 1.5663, "step": 3771 }, { "epoch": 0.13508335273157018, "grad_norm": 1.7647703886032104, "learning_rate": 0.00019426495571296234, "loss": 1.6529, "step": 3772 }, { "epoch": 0.13511916486113848, "grad_norm": 1.8872132301330566, "learning_rate": 0.00019426108351299607, "loss": 1.8358, "step": 3773 }, { "epoch": 0.13515497699070675, "grad_norm": 1.7756673097610474, "learning_rate": 0.00019425721004486852, "loss": 1.8599, "step": 3774 }, { "epoch": 0.13519078912027505, "grad_norm": 1.4053908586502075, "learning_rate": 0.00019425333530863182, "loss": 1.4622, "step": 3775 }, { "epoch": 0.13522660124984331, "grad_norm": 1.7477492094039917, "learning_rate": 0.00019424945930433807, "loss": 1.8449, "step": 3776 }, { "epoch": 0.1352624133794116, "grad_norm": 1.654231309890747, "learning_rate": 0.0001942455820320394, "loss": 1.4055, "step": 3777 }, { "epoch": 0.13529822550897988, "grad_norm": 1.1583056449890137, "learning_rate": 0.00019424170349178802, "loss": 1.6276, "step": 3778 }, { "epoch": 0.13533403763854818, "grad_norm": 1.3777235746383667, "learning_rate": 0.00019423782368363604, "loss": 1.471, "step": 3779 }, { "epoch": 0.13536984976811647, "grad_norm": 1.4694525003433228, "learning_rate": 0.00019423394260763573, "loss": 1.4806, "step": 3780 }, { "epoch": 0.13540566189768474, "grad_norm": 1.5091458559036255, "learning_rate": 0.00019423006026383926, "loss": 1.7003, "step": 3781 }, { "epoch": 0.13544147402725304, "grad_norm": 1.9628468751907349, "learning_rate": 0.0001942261766522989, "loss": 1.8922, "step": 3782 }, { "epoch": 0.1354772861568213, "grad_norm": 1.4558885097503662, "learning_rate": 0.00019422229177306686, "loss": 1.4158, "step": 3783 }, { "epoch": 0.1355130982863896, "grad_norm": 2.17854380607605, "learning_rate": 0.0001942184056261954, "loss": 1.5416, "step": 3784 }, { "epoch": 0.13554891041595787, "grad_norm": 1.5657025575637817, "learning_rate": 0.00019421451821173685, "loss": 1.768, "step": 3785 }, { "epoch": 0.13558472254552617, "grad_norm": 1.424683928489685, "learning_rate": 0.0001942106295297435, "loss": 1.6767, "step": 3786 }, { "epoch": 0.13562053467509447, "grad_norm": 1.3735336065292358, "learning_rate": 0.00019420673958026762, "loss": 1.6295, "step": 3787 }, { "epoch": 0.13565634680466274, "grad_norm": 1.9221537113189697, "learning_rate": 0.0001942028483633616, "loss": 1.9549, "step": 3788 }, { "epoch": 0.13569215893423103, "grad_norm": 1.564569354057312, "learning_rate": 0.00019419895587907777, "loss": 1.6037, "step": 3789 }, { "epoch": 0.1357279710637993, "grad_norm": 1.569985032081604, "learning_rate": 0.0001941950621274685, "loss": 1.8659, "step": 3790 }, { "epoch": 0.1357637831933676, "grad_norm": 1.9685579538345337, "learning_rate": 0.00019419116710858614, "loss": 1.7621, "step": 3791 }, { "epoch": 0.13579959532293587, "grad_norm": 1.6017086505889893, "learning_rate": 0.00019418727082248316, "loss": 1.4238, "step": 3792 }, { "epoch": 0.13583540745250416, "grad_norm": 1.52402925491333, "learning_rate": 0.00019418337326921193, "loss": 1.4991, "step": 3793 }, { "epoch": 0.13587121958207246, "grad_norm": 1.6972289085388184, "learning_rate": 0.0001941794744488249, "loss": 1.7073, "step": 3794 }, { "epoch": 0.13590703171164073, "grad_norm": 1.2555439472198486, "learning_rate": 0.0001941755743613745, "loss": 1.6406, "step": 3795 }, { "epoch": 0.13594284384120903, "grad_norm": 2.1183104515075684, "learning_rate": 0.00019417167300691328, "loss": 1.6005, "step": 3796 }, { "epoch": 0.1359786559707773, "grad_norm": 1.3762356042861938, "learning_rate": 0.00019416777038549362, "loss": 1.3172, "step": 3797 }, { "epoch": 0.1360144681003456, "grad_norm": 1.4184659719467163, "learning_rate": 0.00019416386649716812, "loss": 1.5459, "step": 3798 }, { "epoch": 0.13605028022991386, "grad_norm": 1.5762983560562134, "learning_rate": 0.0001941599613419892, "loss": 1.3168, "step": 3799 }, { "epoch": 0.13608609235948216, "grad_norm": 2.1073853969573975, "learning_rate": 0.00019415605492000953, "loss": 1.9473, "step": 3800 }, { "epoch": 0.13612190448905045, "grad_norm": 1.605509638786316, "learning_rate": 0.00019415214723128154, "loss": 1.5105, "step": 3801 }, { "epoch": 0.13615771661861872, "grad_norm": 2.1277925968170166, "learning_rate": 0.0001941482382758579, "loss": 1.621, "step": 3802 }, { "epoch": 0.13619352874818702, "grad_norm": 1.3043378591537476, "learning_rate": 0.00019414432805379113, "loss": 1.389, "step": 3803 }, { "epoch": 0.1362293408777553, "grad_norm": 2.173734664916992, "learning_rate": 0.00019414041656513385, "loss": 1.4075, "step": 3804 }, { "epoch": 0.1362651530073236, "grad_norm": 1.3967981338500977, "learning_rate": 0.0001941365038099387, "loss": 1.708, "step": 3805 }, { "epoch": 0.13630096513689186, "grad_norm": 1.6393210887908936, "learning_rate": 0.00019413258978825834, "loss": 1.6744, "step": 3806 }, { "epoch": 0.13633677726646015, "grad_norm": 1.854275107383728, "learning_rate": 0.0001941286745001454, "loss": 1.5661, "step": 3807 }, { "epoch": 0.13637258939602845, "grad_norm": 1.3104915618896484, "learning_rate": 0.00019412475794565256, "loss": 1.3521, "step": 3808 }, { "epoch": 0.13640840152559672, "grad_norm": 1.714754343032837, "learning_rate": 0.00019412084012483249, "loss": 1.662, "step": 3809 }, { "epoch": 0.13644421365516501, "grad_norm": 2.037449359893799, "learning_rate": 0.00019411692103773795, "loss": 1.4034, "step": 3810 }, { "epoch": 0.13648002578473328, "grad_norm": 1.4741345643997192, "learning_rate": 0.00019411300068442167, "loss": 1.7963, "step": 3811 }, { "epoch": 0.13651583791430158, "grad_norm": 1.921120524406433, "learning_rate": 0.0001941090790649363, "loss": 1.6161, "step": 3812 }, { "epoch": 0.13655165004386985, "grad_norm": 1.4448705911636353, "learning_rate": 0.00019410515617933468, "loss": 1.2904, "step": 3813 }, { "epoch": 0.13658746217343815, "grad_norm": 1.8026163578033447, "learning_rate": 0.0001941012320276696, "loss": 1.9121, "step": 3814 }, { "epoch": 0.13662327430300641, "grad_norm": 1.457109808921814, "learning_rate": 0.0001940973066099938, "loss": 1.4424, "step": 3815 }, { "epoch": 0.1366590864325747, "grad_norm": 2.4692842960357666, "learning_rate": 0.00019409337992636015, "loss": 1.5736, "step": 3816 }, { "epoch": 0.136694898562143, "grad_norm": 2.4115750789642334, "learning_rate": 0.0001940894519768214, "loss": 1.4381, "step": 3817 }, { "epoch": 0.13673071069171128, "grad_norm": 1.905058741569519, "learning_rate": 0.00019408552276143045, "loss": 1.8125, "step": 3818 }, { "epoch": 0.13676652282127957, "grad_norm": 1.6240928173065186, "learning_rate": 0.00019408159228024018, "loss": 1.5705, "step": 3819 }, { "epoch": 0.13680233495084784, "grad_norm": 1.6123714447021484, "learning_rate": 0.00019407766053330342, "loss": 1.4053, "step": 3820 }, { "epoch": 0.13683814708041614, "grad_norm": 1.5208178758621216, "learning_rate": 0.00019407372752067308, "loss": 1.3531, "step": 3821 }, { "epoch": 0.1368739592099844, "grad_norm": 1.6093798875808716, "learning_rate": 0.0001940697932424021, "loss": 1.5774, "step": 3822 }, { "epoch": 0.1369097713395527, "grad_norm": 1.2067844867706299, "learning_rate": 0.0001940658576985434, "loss": 1.4985, "step": 3823 }, { "epoch": 0.136945583469121, "grad_norm": 1.312362551689148, "learning_rate": 0.0001940619208891499, "loss": 1.6762, "step": 3824 }, { "epoch": 0.13698139559868927, "grad_norm": 1.438751459121704, "learning_rate": 0.0001940579828142746, "loss": 1.6947, "step": 3825 }, { "epoch": 0.13701720772825757, "grad_norm": 1.2554978132247925, "learning_rate": 0.00019405404347397047, "loss": 1.5329, "step": 3826 }, { "epoch": 0.13705301985782584, "grad_norm": 1.9145128726959229, "learning_rate": 0.0001940501028682905, "loss": 1.7587, "step": 3827 }, { "epoch": 0.13708883198739413, "grad_norm": 1.6156679391860962, "learning_rate": 0.00019404616099728773, "loss": 1.5496, "step": 3828 }, { "epoch": 0.1371246441169624, "grad_norm": 1.4059901237487793, "learning_rate": 0.00019404221786101513, "loss": 1.4649, "step": 3829 }, { "epoch": 0.1371604562465307, "grad_norm": 1.4880584478378296, "learning_rate": 0.0001940382734595258, "loss": 1.5342, "step": 3830 }, { "epoch": 0.137196268376099, "grad_norm": 2.3095836639404297, "learning_rate": 0.00019403432779287286, "loss": 1.7837, "step": 3831 }, { "epoch": 0.13723208050566726, "grad_norm": 1.6353516578674316, "learning_rate": 0.00019403038086110926, "loss": 1.7732, "step": 3832 }, { "epoch": 0.13726789263523556, "grad_norm": 1.5463842153549194, "learning_rate": 0.00019402643266428822, "loss": 1.7355, "step": 3833 }, { "epoch": 0.13730370476480383, "grad_norm": 2.119309186935425, "learning_rate": 0.00019402248320246282, "loss": 2.0842, "step": 3834 }, { "epoch": 0.13733951689437213, "grad_norm": 1.909306287765503, "learning_rate": 0.00019401853247568614, "loss": 1.5683, "step": 3835 }, { "epoch": 0.1373753290239404, "grad_norm": 1.564666509628296, "learning_rate": 0.00019401458048401145, "loss": 1.3599, "step": 3836 }, { "epoch": 0.1374111411535087, "grad_norm": 1.7415882349014282, "learning_rate": 0.0001940106272274918, "loss": 1.4302, "step": 3837 }, { "epoch": 0.137446953283077, "grad_norm": 1.8725720643997192, "learning_rate": 0.00019400667270618046, "loss": 1.3951, "step": 3838 }, { "epoch": 0.13748276541264526, "grad_norm": 1.7075797319412231, "learning_rate": 0.00019400271692013058, "loss": 1.9641, "step": 3839 }, { "epoch": 0.13751857754221355, "grad_norm": 2.7534332275390625, "learning_rate": 0.0001939987598693954, "loss": 1.6384, "step": 3840 }, { "epoch": 0.13755438967178182, "grad_norm": 2.2370193004608154, "learning_rate": 0.00019399480155402813, "loss": 1.57, "step": 3841 }, { "epoch": 0.13759020180135012, "grad_norm": 1.1581882238388062, "learning_rate": 0.0001939908419740821, "loss": 1.664, "step": 3842 }, { "epoch": 0.1376260139309184, "grad_norm": 1.3554637432098389, "learning_rate": 0.0001939868811296105, "loss": 1.2336, "step": 3843 }, { "epoch": 0.1376618260604867, "grad_norm": 1.4079585075378418, "learning_rate": 0.00019398291902066666, "loss": 1.5603, "step": 3844 }, { "epoch": 0.13769763819005498, "grad_norm": 1.8186653852462769, "learning_rate": 0.00019397895564730386, "loss": 1.3144, "step": 3845 }, { "epoch": 0.13773345031962325, "grad_norm": 2.0624194145202637, "learning_rate": 0.00019397499100957542, "loss": 1.974, "step": 3846 }, { "epoch": 0.13776926244919155, "grad_norm": 2.5842959880828857, "learning_rate": 0.00019397102510753473, "loss": 1.5397, "step": 3847 }, { "epoch": 0.13780507457875982, "grad_norm": 1.7426347732543945, "learning_rate": 0.0001939670579412351, "loss": 1.8237, "step": 3848 }, { "epoch": 0.13784088670832811, "grad_norm": 1.6106834411621094, "learning_rate": 0.00019396308951072992, "loss": 1.6794, "step": 3849 }, { "epoch": 0.13787669883789638, "grad_norm": 2.3994948863983154, "learning_rate": 0.00019395911981607254, "loss": 1.4646, "step": 3850 }, { "epoch": 0.13791251096746468, "grad_norm": 1.4603257179260254, "learning_rate": 0.00019395514885731644, "loss": 1.5009, "step": 3851 }, { "epoch": 0.13794832309703298, "grad_norm": 2.004852056503296, "learning_rate": 0.000193951176634515, "loss": 1.6899, "step": 3852 }, { "epoch": 0.13798413522660125, "grad_norm": 1.9660876989364624, "learning_rate": 0.00019394720314772166, "loss": 1.7038, "step": 3853 }, { "epoch": 0.13801994735616954, "grad_norm": 1.419790506362915, "learning_rate": 0.00019394322839698988, "loss": 1.8069, "step": 3854 }, { "epoch": 0.1380557594857378, "grad_norm": 1.6597111225128174, "learning_rate": 0.00019393925238237313, "loss": 1.8452, "step": 3855 }, { "epoch": 0.1380915716153061, "grad_norm": 1.4487488269805908, "learning_rate": 0.00019393527510392494, "loss": 1.7923, "step": 3856 }, { "epoch": 0.13812738374487438, "grad_norm": 1.6511144638061523, "learning_rate": 0.0001939312965616988, "loss": 1.6654, "step": 3857 }, { "epoch": 0.13816319587444267, "grad_norm": 1.8880691528320312, "learning_rate": 0.0001939273167557482, "loss": 1.666, "step": 3858 }, { "epoch": 0.13819900800401097, "grad_norm": 1.333387017250061, "learning_rate": 0.00019392333568612672, "loss": 1.6742, "step": 3859 }, { "epoch": 0.13823482013357924, "grad_norm": 1.4086413383483887, "learning_rate": 0.00019391935335288788, "loss": 1.5357, "step": 3860 }, { "epoch": 0.13827063226314754, "grad_norm": 2.5079047679901123, "learning_rate": 0.00019391536975608533, "loss": 1.6305, "step": 3861 }, { "epoch": 0.1383064443927158, "grad_norm": 1.7378469705581665, "learning_rate": 0.0001939113848957726, "loss": 1.2147, "step": 3862 }, { "epoch": 0.1383422565222841, "grad_norm": 2.0225167274475098, "learning_rate": 0.00019390739877200335, "loss": 1.5807, "step": 3863 }, { "epoch": 0.13837806865185237, "grad_norm": 1.6210397481918335, "learning_rate": 0.00019390341138483117, "loss": 1.6289, "step": 3864 }, { "epoch": 0.13841388078142067, "grad_norm": 1.4240684509277344, "learning_rate": 0.0001938994227343097, "loss": 1.7307, "step": 3865 }, { "epoch": 0.13844969291098896, "grad_norm": 1.4018386602401733, "learning_rate": 0.00019389543282049263, "loss": 1.4844, "step": 3866 }, { "epoch": 0.13848550504055723, "grad_norm": 1.4546011686325073, "learning_rate": 0.0001938914416434336, "loss": 1.6839, "step": 3867 }, { "epoch": 0.13852131717012553, "grad_norm": 1.5144541263580322, "learning_rate": 0.00019388744920318638, "loss": 1.6554, "step": 3868 }, { "epoch": 0.1385571292996938, "grad_norm": 1.9731286764144897, "learning_rate": 0.00019388345549980462, "loss": 1.8439, "step": 3869 }, { "epoch": 0.1385929414292621, "grad_norm": 1.4650535583496094, "learning_rate": 0.00019387946053334206, "loss": 1.836, "step": 3870 }, { "epoch": 0.13862875355883036, "grad_norm": 2.14497447013855, "learning_rate": 0.00019387546430385246, "loss": 1.5419, "step": 3871 }, { "epoch": 0.13866456568839866, "grad_norm": 1.929789423942566, "learning_rate": 0.00019387146681138957, "loss": 1.6334, "step": 3872 }, { "epoch": 0.13870037781796696, "grad_norm": 1.5620695352554321, "learning_rate": 0.00019386746805600717, "loss": 1.5802, "step": 3873 }, { "epoch": 0.13873618994753523, "grad_norm": 1.579016089439392, "learning_rate": 0.00019386346803775909, "loss": 1.5735, "step": 3874 }, { "epoch": 0.13877200207710352, "grad_norm": 2.0494682788848877, "learning_rate": 0.00019385946675669913, "loss": 1.6366, "step": 3875 }, { "epoch": 0.1388078142066718, "grad_norm": 1.5577737092971802, "learning_rate": 0.0001938554642128811, "loss": 1.5314, "step": 3876 }, { "epoch": 0.1388436263362401, "grad_norm": 1.546035647392273, "learning_rate": 0.00019385146040635886, "loss": 1.6867, "step": 3877 }, { "epoch": 0.13887943846580836, "grad_norm": 1.4088118076324463, "learning_rate": 0.00019384745533718628, "loss": 1.2657, "step": 3878 }, { "epoch": 0.13891525059537665, "grad_norm": 2.4216670989990234, "learning_rate": 0.00019384344900541723, "loss": 1.448, "step": 3879 }, { "epoch": 0.13895106272494495, "grad_norm": 2.032590627670288, "learning_rate": 0.00019383944141110565, "loss": 1.607, "step": 3880 }, { "epoch": 0.13898687485451322, "grad_norm": 1.4707528352737427, "learning_rate": 0.00019383543255430542, "loss": 1.3947, "step": 3881 }, { "epoch": 0.13902268698408152, "grad_norm": 1.8237826824188232, "learning_rate": 0.00019383142243507048, "loss": 2.0583, "step": 3882 }, { "epoch": 0.13905849911364979, "grad_norm": 1.9654194116592407, "learning_rate": 0.00019382741105345482, "loss": 1.6511, "step": 3883 }, { "epoch": 0.13909431124321808, "grad_norm": 1.2782480716705322, "learning_rate": 0.0001938233984095123, "loss": 1.8372, "step": 3884 }, { "epoch": 0.13913012337278635, "grad_norm": 1.2892425060272217, "learning_rate": 0.00019381938450329704, "loss": 1.8873, "step": 3885 }, { "epoch": 0.13916593550235465, "grad_norm": 1.9381794929504395, "learning_rate": 0.00019381536933486295, "loss": 1.8379, "step": 3886 }, { "epoch": 0.13920174763192295, "grad_norm": 1.7396278381347656, "learning_rate": 0.0001938113529042641, "loss": 1.7356, "step": 3887 }, { "epoch": 0.13923755976149121, "grad_norm": 1.13014817237854, "learning_rate": 0.0001938073352115545, "loss": 1.4796, "step": 3888 }, { "epoch": 0.1392733718910595, "grad_norm": 1.9644767045974731, "learning_rate": 0.00019380331625678821, "loss": 1.7187, "step": 3889 }, { "epoch": 0.13930918402062778, "grad_norm": 1.3784605264663696, "learning_rate": 0.00019379929604001927, "loss": 1.5731, "step": 3890 }, { "epoch": 0.13934499615019608, "grad_norm": 2.058908700942993, "learning_rate": 0.00019379527456130183, "loss": 1.7203, "step": 3891 }, { "epoch": 0.13938080827976435, "grad_norm": 1.9625152349472046, "learning_rate": 0.00019379125182068994, "loss": 1.4671, "step": 3892 }, { "epoch": 0.13941662040933264, "grad_norm": 2.0643458366394043, "learning_rate": 0.00019378722781823772, "loss": 1.6485, "step": 3893 }, { "epoch": 0.13945243253890094, "grad_norm": 1.778398871421814, "learning_rate": 0.00019378320255399934, "loss": 1.6492, "step": 3894 }, { "epoch": 0.1394882446684692, "grad_norm": 1.6145321130752563, "learning_rate": 0.00019377917602802897, "loss": 1.6214, "step": 3895 }, { "epoch": 0.1395240567980375, "grad_norm": 2.243457794189453, "learning_rate": 0.00019377514824038073, "loss": 1.9855, "step": 3896 }, { "epoch": 0.13955986892760577, "grad_norm": 2.51615047454834, "learning_rate": 0.00019377111919110883, "loss": 2.0496, "step": 3897 }, { "epoch": 0.13959568105717407, "grad_norm": 1.4384043216705322, "learning_rate": 0.00019376708888026747, "loss": 1.7474, "step": 3898 }, { "epoch": 0.13963149318674234, "grad_norm": 2.0922720432281494, "learning_rate": 0.0001937630573079109, "loss": 1.5602, "step": 3899 }, { "epoch": 0.13966730531631064, "grad_norm": 1.4650601148605347, "learning_rate": 0.0001937590244740933, "loss": 1.3567, "step": 3900 }, { "epoch": 0.13970311744587893, "grad_norm": 1.8897500038146973, "learning_rate": 0.000193754990378869, "loss": 1.6301, "step": 3901 }, { "epoch": 0.1397389295754472, "grad_norm": 1.7540127038955688, "learning_rate": 0.00019375095502229223, "loss": 1.4029, "step": 3902 }, { "epoch": 0.1397747417050155, "grad_norm": 1.8361109495162964, "learning_rate": 0.0001937469184044173, "loss": 1.6593, "step": 3903 }, { "epoch": 0.13981055383458377, "grad_norm": 1.9301716089248657, "learning_rate": 0.0001937428805252985, "loss": 1.3712, "step": 3904 }, { "epoch": 0.13984636596415206, "grad_norm": 1.518929123878479, "learning_rate": 0.00019373884138499018, "loss": 1.5331, "step": 3905 }, { "epoch": 0.13988217809372033, "grad_norm": 1.6392289400100708, "learning_rate": 0.00019373480098354665, "loss": 1.5763, "step": 3906 }, { "epoch": 0.13991799022328863, "grad_norm": 1.7571324110031128, "learning_rate": 0.00019373075932102227, "loss": 1.6839, "step": 3907 }, { "epoch": 0.13995380235285693, "grad_norm": 1.5957682132720947, "learning_rate": 0.00019372671639747145, "loss": 1.7607, "step": 3908 }, { "epoch": 0.1399896144824252, "grad_norm": 1.305310845375061, "learning_rate": 0.00019372267221294854, "loss": 1.679, "step": 3909 }, { "epoch": 0.1400254266119935, "grad_norm": 1.584241271018982, "learning_rate": 0.00019371862676750796, "loss": 1.6078, "step": 3910 }, { "epoch": 0.14006123874156176, "grad_norm": 1.5107513666152954, "learning_rate": 0.00019371458006120417, "loss": 1.7136, "step": 3911 }, { "epoch": 0.14009705087113006, "grad_norm": 1.7936160564422607, "learning_rate": 0.00019371053209409157, "loss": 1.5745, "step": 3912 }, { "epoch": 0.14013286300069833, "grad_norm": 2.0149476528167725, "learning_rate": 0.00019370648286622466, "loss": 1.3959, "step": 3913 }, { "epoch": 0.14016867513026662, "grad_norm": 1.791279673576355, "learning_rate": 0.00019370243237765787, "loss": 1.2805, "step": 3914 }, { "epoch": 0.1402044872598349, "grad_norm": 1.2142484188079834, "learning_rate": 0.00019369838062844577, "loss": 1.6181, "step": 3915 }, { "epoch": 0.1402402993894032, "grad_norm": 1.787173867225647, "learning_rate": 0.00019369432761864278, "loss": 1.5593, "step": 3916 }, { "epoch": 0.14027611151897149, "grad_norm": 1.2792410850524902, "learning_rate": 0.00019369027334830346, "loss": 1.5291, "step": 3917 }, { "epoch": 0.14031192364853975, "grad_norm": 1.5265432596206665, "learning_rate": 0.00019368621781748238, "loss": 1.7252, "step": 3918 }, { "epoch": 0.14034773577810805, "grad_norm": 1.5280210971832275, "learning_rate": 0.0001936821610262341, "loss": 1.6944, "step": 3919 }, { "epoch": 0.14038354790767632, "grad_norm": 1.6217960119247437, "learning_rate": 0.00019367810297461313, "loss": 1.4182, "step": 3920 }, { "epoch": 0.14041936003724462, "grad_norm": 1.484212875366211, "learning_rate": 0.00019367404366267416, "loss": 1.4251, "step": 3921 }, { "epoch": 0.14045517216681289, "grad_norm": 1.711181402206421, "learning_rate": 0.0001936699830904718, "loss": 1.7121, "step": 3922 }, { "epoch": 0.14049098429638118, "grad_norm": 1.52628755569458, "learning_rate": 0.00019366592125806057, "loss": 1.5609, "step": 3923 }, { "epoch": 0.14052679642594948, "grad_norm": 1.480896234512329, "learning_rate": 0.00019366185816549524, "loss": 1.4094, "step": 3924 }, { "epoch": 0.14056260855551775, "grad_norm": 1.4317753314971924, "learning_rate": 0.0001936577938128304, "loss": 1.6869, "step": 3925 }, { "epoch": 0.14059842068508605, "grad_norm": 2.088813543319702, "learning_rate": 0.00019365372820012077, "loss": 1.4735, "step": 3926 }, { "epoch": 0.14063423281465431, "grad_norm": 1.6882331371307373, "learning_rate": 0.00019364966132742102, "loss": 1.7869, "step": 3927 }, { "epoch": 0.1406700449442226, "grad_norm": 2.3925135135650635, "learning_rate": 0.00019364559319478585, "loss": 1.5204, "step": 3928 }, { "epoch": 0.14070585707379088, "grad_norm": 1.3298742771148682, "learning_rate": 0.00019364152380227007, "loss": 1.3791, "step": 3929 }, { "epoch": 0.14074166920335918, "grad_norm": 2.0857841968536377, "learning_rate": 0.00019363745314992836, "loss": 1.6491, "step": 3930 }, { "epoch": 0.14077748133292747, "grad_norm": 1.499003291130066, "learning_rate": 0.00019363338123781548, "loss": 1.5969, "step": 3931 }, { "epoch": 0.14081329346249574, "grad_norm": 1.8543964624404907, "learning_rate": 0.00019362930806598625, "loss": 1.5286, "step": 3932 }, { "epoch": 0.14084910559206404, "grad_norm": 1.2838963270187378, "learning_rate": 0.00019362523363449546, "loss": 1.3795, "step": 3933 }, { "epoch": 0.1408849177216323, "grad_norm": 1.6175882816314697, "learning_rate": 0.0001936211579433979, "loss": 1.5956, "step": 3934 }, { "epoch": 0.1409207298512006, "grad_norm": 1.3847663402557373, "learning_rate": 0.00019361708099274844, "loss": 1.7353, "step": 3935 }, { "epoch": 0.14095654198076887, "grad_norm": 1.8384443521499634, "learning_rate": 0.00019361300278260193, "loss": 2.0406, "step": 3936 }, { "epoch": 0.14099235411033717, "grad_norm": 1.939363718032837, "learning_rate": 0.00019360892331301316, "loss": 1.7603, "step": 3937 }, { "epoch": 0.14102816623990547, "grad_norm": 1.2415847778320312, "learning_rate": 0.00019360484258403713, "loss": 1.7003, "step": 3938 }, { "epoch": 0.14106397836947374, "grad_norm": 3.8942644596099854, "learning_rate": 0.00019360076059572867, "loss": 1.3907, "step": 3939 }, { "epoch": 0.14109979049904203, "grad_norm": 1.311605453491211, "learning_rate": 0.0001935966773481427, "loss": 1.7552, "step": 3940 }, { "epoch": 0.1411356026286103, "grad_norm": 1.316116452217102, "learning_rate": 0.00019359259284133418, "loss": 1.4707, "step": 3941 }, { "epoch": 0.1411714147581786, "grad_norm": 1.855420470237732, "learning_rate": 0.00019358850707535804, "loss": 1.445, "step": 3942 }, { "epoch": 0.14120722688774687, "grad_norm": 1.1825437545776367, "learning_rate": 0.00019358442005026926, "loss": 1.3708, "step": 3943 }, { "epoch": 0.14124303901731516, "grad_norm": 1.6518439054489136, "learning_rate": 0.0001935803317661228, "loss": 1.6204, "step": 3944 }, { "epoch": 0.14127885114688346, "grad_norm": 1.6733884811401367, "learning_rate": 0.0001935762422229737, "loss": 1.7793, "step": 3945 }, { "epoch": 0.14131466327645173, "grad_norm": 1.8255501985549927, "learning_rate": 0.00019357215142087699, "loss": 1.3537, "step": 3946 }, { "epoch": 0.14135047540602003, "grad_norm": 1.6905624866485596, "learning_rate": 0.0001935680593598877, "loss": 1.4878, "step": 3947 }, { "epoch": 0.1413862875355883, "grad_norm": 1.3700306415557861, "learning_rate": 0.00019356396604006083, "loss": 1.3288, "step": 3948 }, { "epoch": 0.1414220996651566, "grad_norm": 1.2963467836380005, "learning_rate": 0.00019355987146145147, "loss": 1.5802, "step": 3949 }, { "epoch": 0.14145791179472486, "grad_norm": 1.2930432558059692, "learning_rate": 0.00019355577562411473, "loss": 1.592, "step": 3950 }, { "epoch": 0.14149372392429316, "grad_norm": 1.4524013996124268, "learning_rate": 0.00019355167852810575, "loss": 1.6285, "step": 3951 }, { "epoch": 0.14152953605386145, "grad_norm": 1.399037480354309, "learning_rate": 0.00019354758017347957, "loss": 1.5855, "step": 3952 }, { "epoch": 0.14156534818342972, "grad_norm": 1.4224966764450073, "learning_rate": 0.00019354348056029136, "loss": 1.9141, "step": 3953 }, { "epoch": 0.14160116031299802, "grad_norm": 1.995407223701477, "learning_rate": 0.0001935393796885963, "loss": 1.6105, "step": 3954 }, { "epoch": 0.1416369724425663, "grad_norm": 1.7793134450912476, "learning_rate": 0.00019353527755844953, "loss": 1.6095, "step": 3955 }, { "epoch": 0.14167278457213459, "grad_norm": 1.9654982089996338, "learning_rate": 0.00019353117416990627, "loss": 1.6372, "step": 3956 }, { "epoch": 0.14170859670170285, "grad_norm": 1.6730225086212158, "learning_rate": 0.0001935270695230217, "loss": 1.3259, "step": 3957 }, { "epoch": 0.14174440883127115, "grad_norm": 1.6628282070159912, "learning_rate": 0.00019352296361785105, "loss": 1.6113, "step": 3958 }, { "epoch": 0.14178022096083945, "grad_norm": 1.5034743547439575, "learning_rate": 0.00019351885645444957, "loss": 1.5158, "step": 3959 }, { "epoch": 0.14181603309040772, "grad_norm": 2.691425323486328, "learning_rate": 0.0001935147480328725, "loss": 1.6924, "step": 3960 }, { "epoch": 0.141851845219976, "grad_norm": 1.750555157661438, "learning_rate": 0.0001935106383531751, "loss": 1.6288, "step": 3961 }, { "epoch": 0.14188765734954428, "grad_norm": 2.0469725131988525, "learning_rate": 0.00019350652741541272, "loss": 1.7488, "step": 3962 }, { "epoch": 0.14192346947911258, "grad_norm": 1.605979323387146, "learning_rate": 0.00019350241521964062, "loss": 1.6378, "step": 3963 }, { "epoch": 0.14195928160868085, "grad_norm": 1.7067230939865112, "learning_rate": 0.00019349830176591408, "loss": 1.778, "step": 3964 }, { "epoch": 0.14199509373824915, "grad_norm": 2.101348400115967, "learning_rate": 0.00019349418705428854, "loss": 1.4173, "step": 3965 }, { "epoch": 0.14203090586781744, "grad_norm": 1.571287989616394, "learning_rate": 0.0001934900710848193, "loss": 1.6081, "step": 3966 }, { "epoch": 0.1420667179973857, "grad_norm": 1.2210115194320679, "learning_rate": 0.00019348595385756178, "loss": 1.1689, "step": 3967 }, { "epoch": 0.142102530126954, "grad_norm": 1.7547118663787842, "learning_rate": 0.00019348183537257131, "loss": 1.652, "step": 3968 }, { "epoch": 0.14213834225652228, "grad_norm": 2.7394344806671143, "learning_rate": 0.00019347771562990332, "loss": 1.8632, "step": 3969 }, { "epoch": 0.14217415438609057, "grad_norm": 1.5795948505401611, "learning_rate": 0.00019347359462961326, "loss": 1.5562, "step": 3970 }, { "epoch": 0.14220996651565884, "grad_norm": 1.3725916147232056, "learning_rate": 0.00019346947237175655, "loss": 1.4961, "step": 3971 }, { "epoch": 0.14224577864522714, "grad_norm": 1.5435025691986084, "learning_rate": 0.00019346534885638866, "loss": 1.877, "step": 3972 }, { "epoch": 0.14228159077479544, "grad_norm": 1.6002931594848633, "learning_rate": 0.00019346122408356507, "loss": 1.6769, "step": 3973 }, { "epoch": 0.1423174029043637, "grad_norm": 1.736573576927185, "learning_rate": 0.00019345709805334123, "loss": 1.5778, "step": 3974 }, { "epoch": 0.142353215033932, "grad_norm": 1.5255703926086426, "learning_rate": 0.00019345297076577272, "loss": 1.8338, "step": 3975 }, { "epoch": 0.14238902716350027, "grad_norm": 1.9835999011993408, "learning_rate": 0.00019344884222091503, "loss": 1.6607, "step": 3976 }, { "epoch": 0.14242483929306857, "grad_norm": 1.5903600454330444, "learning_rate": 0.00019344471241882372, "loss": 1.5658, "step": 3977 }, { "epoch": 0.14246065142263684, "grad_norm": 1.982418179512024, "learning_rate": 0.0001934405813595543, "loss": 1.7213, "step": 3978 }, { "epoch": 0.14249646355220513, "grad_norm": 1.5569443702697754, "learning_rate": 0.00019343644904316242, "loss": 1.6512, "step": 3979 }, { "epoch": 0.14253227568177343, "grad_norm": 1.252939224243164, "learning_rate": 0.0001934323154697036, "loss": 1.307, "step": 3980 }, { "epoch": 0.1425680878113417, "grad_norm": 1.7318528890609741, "learning_rate": 0.00019342818063923357, "loss": 1.6283, "step": 3981 }, { "epoch": 0.14260389994091, "grad_norm": 1.3830797672271729, "learning_rate": 0.00019342404455180784, "loss": 1.5416, "step": 3982 }, { "epoch": 0.14263971207047826, "grad_norm": 1.8082152605056763, "learning_rate": 0.00019341990720748208, "loss": 1.4456, "step": 3983 }, { "epoch": 0.14267552420004656, "grad_norm": 1.605903148651123, "learning_rate": 0.000193415768606312, "loss": 1.9392, "step": 3984 }, { "epoch": 0.14271133632961483, "grad_norm": 1.7564297914505005, "learning_rate": 0.00019341162874835326, "loss": 1.9917, "step": 3985 }, { "epoch": 0.14274714845918313, "grad_norm": 1.354068398475647, "learning_rate": 0.00019340748763366152, "loss": 1.6126, "step": 3986 }, { "epoch": 0.14278296058875142, "grad_norm": 1.6402426958084106, "learning_rate": 0.00019340334526229253, "loss": 1.7782, "step": 3987 }, { "epoch": 0.1428187727183197, "grad_norm": 1.6833535432815552, "learning_rate": 0.00019339920163430202, "loss": 1.6701, "step": 3988 }, { "epoch": 0.142854584847888, "grad_norm": 2.3586952686309814, "learning_rate": 0.0001933950567497457, "loss": 1.6699, "step": 3989 }, { "epoch": 0.14289039697745626, "grad_norm": 2.0771920680999756, "learning_rate": 0.0001933909106086794, "loss": 1.6299, "step": 3990 }, { "epoch": 0.14292620910702455, "grad_norm": 1.3921229839324951, "learning_rate": 0.00019338676321115883, "loss": 1.4079, "step": 3991 }, { "epoch": 0.14296202123659282, "grad_norm": 1.5394184589385986, "learning_rate": 0.00019338261455723984, "loss": 1.3872, "step": 3992 }, { "epoch": 0.14299783336616112, "grad_norm": 1.6472933292388916, "learning_rate": 0.00019337846464697825, "loss": 1.3228, "step": 3993 }, { "epoch": 0.14303364549572942, "grad_norm": 1.6850037574768066, "learning_rate": 0.00019337431348042983, "loss": 1.5189, "step": 3994 }, { "epoch": 0.14306945762529769, "grad_norm": 3.3073441982269287, "learning_rate": 0.00019337016105765048, "loss": 1.5217, "step": 3995 }, { "epoch": 0.14310526975486598, "grad_norm": 1.2370121479034424, "learning_rate": 0.00019336600737869603, "loss": 1.89, "step": 3996 }, { "epoch": 0.14314108188443425, "grad_norm": 1.7099319696426392, "learning_rate": 0.00019336185244362244, "loss": 1.5129, "step": 3997 }, { "epoch": 0.14317689401400255, "grad_norm": 1.7119805812835693, "learning_rate": 0.0001933576962524855, "loss": 1.3498, "step": 3998 }, { "epoch": 0.14321270614357082, "grad_norm": 1.3768866062164307, "learning_rate": 0.0001933535388053412, "loss": 1.4905, "step": 3999 }, { "epoch": 0.1432485182731391, "grad_norm": 2.029284715652466, "learning_rate": 0.00019334938010224546, "loss": 1.8039, "step": 4000 }, { "epoch": 0.1432843304027074, "grad_norm": 1.4423243999481201, "learning_rate": 0.0001933452201432542, "loss": 1.7993, "step": 4001 }, { "epoch": 0.14332014253227568, "grad_norm": 1.3633579015731812, "learning_rate": 0.00019334105892842342, "loss": 1.5531, "step": 4002 }, { "epoch": 0.14335595466184398, "grad_norm": 1.5188398361206055, "learning_rate": 0.00019333689645780912, "loss": 1.7493, "step": 4003 }, { "epoch": 0.14339176679141225, "grad_norm": 1.5770578384399414, "learning_rate": 0.00019333273273146721, "loss": 1.5965, "step": 4004 }, { "epoch": 0.14342757892098054, "grad_norm": 1.7399095296859741, "learning_rate": 0.00019332856774945383, "loss": 1.6626, "step": 4005 }, { "epoch": 0.1434633910505488, "grad_norm": 1.923545241355896, "learning_rate": 0.00019332440151182493, "loss": 1.245, "step": 4006 }, { "epoch": 0.1434992031801171, "grad_norm": 1.3939712047576904, "learning_rate": 0.00019332023401863658, "loss": 1.3326, "step": 4007 }, { "epoch": 0.1435350153096854, "grad_norm": 1.7758983373641968, "learning_rate": 0.00019331606526994488, "loss": 1.603, "step": 4008 }, { "epoch": 0.14357082743925367, "grad_norm": 1.7037540674209595, "learning_rate": 0.0001933118952658059, "loss": 1.5085, "step": 4009 }, { "epoch": 0.14360663956882197, "grad_norm": 1.764907956123352, "learning_rate": 0.00019330772400627573, "loss": 1.1214, "step": 4010 }, { "epoch": 0.14364245169839024, "grad_norm": 1.9461796283721924, "learning_rate": 0.00019330355149141046, "loss": 1.5362, "step": 4011 }, { "epoch": 0.14367826382795854, "grad_norm": 2.126535415649414, "learning_rate": 0.00019329937772126626, "loss": 1.9574, "step": 4012 }, { "epoch": 0.1437140759575268, "grad_norm": 1.2146317958831787, "learning_rate": 0.0001932952026958993, "loss": 1.3589, "step": 4013 }, { "epoch": 0.1437498880870951, "grad_norm": 1.6272441148757935, "learning_rate": 0.00019329102641536575, "loss": 1.4191, "step": 4014 }, { "epoch": 0.14378570021666337, "grad_norm": 1.698939561843872, "learning_rate": 0.00019328684887972173, "loss": 1.6507, "step": 4015 }, { "epoch": 0.14382151234623167, "grad_norm": 2.0913240909576416, "learning_rate": 0.00019328267008902352, "loss": 1.5051, "step": 4016 }, { "epoch": 0.14385732447579996, "grad_norm": 1.6525379419326782, "learning_rate": 0.00019327849004332728, "loss": 1.7334, "step": 4017 }, { "epoch": 0.14389313660536823, "grad_norm": 1.996968388557434, "learning_rate": 0.0001932743087426893, "loss": 1.6867, "step": 4018 }, { "epoch": 0.14392894873493653, "grad_norm": 1.8276373147964478, "learning_rate": 0.00019327012618716583, "loss": 1.6823, "step": 4019 }, { "epoch": 0.1439647608645048, "grad_norm": 1.7384955883026123, "learning_rate": 0.00019326594237681311, "loss": 1.9046, "step": 4020 }, { "epoch": 0.1440005729940731, "grad_norm": 1.7132830619812012, "learning_rate": 0.00019326175731168742, "loss": 1.8533, "step": 4021 }, { "epoch": 0.14403638512364136, "grad_norm": 1.467646837234497, "learning_rate": 0.00019325757099184507, "loss": 1.2429, "step": 4022 }, { "epoch": 0.14407219725320966, "grad_norm": 1.7912276983261108, "learning_rate": 0.00019325338341734245, "loss": 1.685, "step": 4023 }, { "epoch": 0.14410800938277796, "grad_norm": 1.2599592208862305, "learning_rate": 0.00019324919458823582, "loss": 1.5783, "step": 4024 }, { "epoch": 0.14414382151234623, "grad_norm": 2.230529308319092, "learning_rate": 0.00019324500450458153, "loss": 1.7649, "step": 4025 }, { "epoch": 0.14417963364191452, "grad_norm": 1.7477943897247314, "learning_rate": 0.000193240813166436, "loss": 1.5066, "step": 4026 }, { "epoch": 0.1442154457714828, "grad_norm": 2.1602365970611572, "learning_rate": 0.0001932366205738556, "loss": 1.3825, "step": 4027 }, { "epoch": 0.1442512579010511, "grad_norm": 1.7186968326568604, "learning_rate": 0.00019323242672689676, "loss": 1.6016, "step": 4028 }, { "epoch": 0.14428707003061936, "grad_norm": 1.2913117408752441, "learning_rate": 0.00019322823162561586, "loss": 1.6547, "step": 4029 }, { "epoch": 0.14432288216018765, "grad_norm": 2.078582525253296, "learning_rate": 0.00019322403527006937, "loss": 1.5363, "step": 4030 }, { "epoch": 0.14435869428975595, "grad_norm": 1.4359171390533447, "learning_rate": 0.00019321983766031373, "loss": 1.8056, "step": 4031 }, { "epoch": 0.14439450641932422, "grad_norm": 1.497952938079834, "learning_rate": 0.00019321563879640542, "loss": 1.421, "step": 4032 }, { "epoch": 0.14443031854889252, "grad_norm": 1.6174252033233643, "learning_rate": 0.00019321143867840091, "loss": 1.5336, "step": 4033 }, { "epoch": 0.14446613067846079, "grad_norm": 1.4377790689468384, "learning_rate": 0.00019320723730635676, "loss": 1.8566, "step": 4034 }, { "epoch": 0.14450194280802908, "grad_norm": 2.628340482711792, "learning_rate": 0.00019320303468032944, "loss": 1.6271, "step": 4035 }, { "epoch": 0.14453775493759735, "grad_norm": 1.5452464818954468, "learning_rate": 0.00019319883080037552, "loss": 1.7898, "step": 4036 }, { "epoch": 0.14457356706716565, "grad_norm": 1.258744478225708, "learning_rate": 0.00019319462566655155, "loss": 1.727, "step": 4037 }, { "epoch": 0.14460937919673394, "grad_norm": 1.481390357017517, "learning_rate": 0.0001931904192789141, "loss": 1.6695, "step": 4038 }, { "epoch": 0.1446451913263022, "grad_norm": 1.921749234199524, "learning_rate": 0.00019318621163751974, "loss": 1.4772, "step": 4039 }, { "epoch": 0.1446810034558705, "grad_norm": 1.3307794332504272, "learning_rate": 0.00019318200274242515, "loss": 1.7675, "step": 4040 }, { "epoch": 0.14471681558543878, "grad_norm": 1.9500494003295898, "learning_rate": 0.0001931777925936869, "loss": 1.2512, "step": 4041 }, { "epoch": 0.14475262771500708, "grad_norm": 1.4262527227401733, "learning_rate": 0.00019317358119136163, "loss": 1.5668, "step": 4042 }, { "epoch": 0.14478843984457535, "grad_norm": 2.077671766281128, "learning_rate": 0.000193169368535506, "loss": 1.6646, "step": 4043 }, { "epoch": 0.14482425197414364, "grad_norm": 2.521920680999756, "learning_rate": 0.00019316515462617672, "loss": 1.9527, "step": 4044 }, { "epoch": 0.14486006410371194, "grad_norm": 2.3683269023895264, "learning_rate": 0.00019316093946343044, "loss": 1.3807, "step": 4045 }, { "epoch": 0.1448958762332802, "grad_norm": 2.656454086303711, "learning_rate": 0.00019315672304732388, "loss": 1.38, "step": 4046 }, { "epoch": 0.1449316883628485, "grad_norm": 2.149718999862671, "learning_rate": 0.0001931525053779138, "loss": 1.4746, "step": 4047 }, { "epoch": 0.14496750049241677, "grad_norm": 1.8854492902755737, "learning_rate": 0.00019314828645525692, "loss": 1.5098, "step": 4048 }, { "epoch": 0.14500331262198507, "grad_norm": 1.668682336807251, "learning_rate": 0.00019314406627940996, "loss": 1.3973, "step": 4049 }, { "epoch": 0.14503912475155334, "grad_norm": 2.1312713623046875, "learning_rate": 0.00019313984485042976, "loss": 1.6263, "step": 4050 }, { "epoch": 0.14507493688112164, "grad_norm": 1.5961402654647827, "learning_rate": 0.0001931356221683731, "loss": 1.6585, "step": 4051 }, { "epoch": 0.14511074901068993, "grad_norm": 1.9257919788360596, "learning_rate": 0.00019313139823329677, "loss": 1.5084, "step": 4052 }, { "epoch": 0.1451465611402582, "grad_norm": 1.2645249366760254, "learning_rate": 0.00019312717304525762, "loss": 1.4746, "step": 4053 }, { "epoch": 0.1451823732698265, "grad_norm": 2.637812614440918, "learning_rate": 0.00019312294660431246, "loss": 2.0501, "step": 4054 }, { "epoch": 0.14521818539939477, "grad_norm": 1.7504860162734985, "learning_rate": 0.00019311871891051818, "loss": 1.5642, "step": 4055 }, { "epoch": 0.14525399752896306, "grad_norm": 1.7490657567977905, "learning_rate": 0.00019311448996393163, "loss": 1.4698, "step": 4056 }, { "epoch": 0.14528980965853133, "grad_norm": 1.9256107807159424, "learning_rate": 0.00019311025976460978, "loss": 1.5543, "step": 4057 }, { "epoch": 0.14532562178809963, "grad_norm": 1.8809314966201782, "learning_rate": 0.00019310602831260944, "loss": 1.6738, "step": 4058 }, { "epoch": 0.14536143391766793, "grad_norm": 1.5549511909484863, "learning_rate": 0.0001931017956079876, "loss": 1.8064, "step": 4059 }, { "epoch": 0.1453972460472362, "grad_norm": 1.6281778812408447, "learning_rate": 0.0001930975616508012, "loss": 1.6228, "step": 4060 }, { "epoch": 0.1454330581768045, "grad_norm": 1.7993215322494507, "learning_rate": 0.00019309332644110722, "loss": 1.9817, "step": 4061 }, { "epoch": 0.14546887030637276, "grad_norm": 1.8530840873718262, "learning_rate": 0.0001930890899789626, "loss": 1.3957, "step": 4062 }, { "epoch": 0.14550468243594106, "grad_norm": 1.63814115524292, "learning_rate": 0.0001930848522644243, "loss": 1.6432, "step": 4063 }, { "epoch": 0.14554049456550933, "grad_norm": 1.4699954986572266, "learning_rate": 0.00019308061329754942, "loss": 1.6721, "step": 4064 }, { "epoch": 0.14557630669507762, "grad_norm": 1.9068433046340942, "learning_rate": 0.00019307637307839498, "loss": 1.4541, "step": 4065 }, { "epoch": 0.14561211882464592, "grad_norm": 2.003532886505127, "learning_rate": 0.00019307213160701798, "loss": 1.5527, "step": 4066 }, { "epoch": 0.1456479309542142, "grad_norm": 1.656280755996704, "learning_rate": 0.0001930678888834755, "loss": 1.4751, "step": 4067 }, { "epoch": 0.14568374308378249, "grad_norm": 1.5825142860412598, "learning_rate": 0.00019306364490782462, "loss": 1.749, "step": 4068 }, { "epoch": 0.14571955521335075, "grad_norm": 1.796554446220398, "learning_rate": 0.00019305939968012245, "loss": 1.6165, "step": 4069 }, { "epoch": 0.14575536734291905, "grad_norm": 1.4555829763412476, "learning_rate": 0.00019305515320042611, "loss": 1.5499, "step": 4070 }, { "epoch": 0.14579117947248732, "grad_norm": 1.407891869544983, "learning_rate": 0.00019305090546879267, "loss": 1.4587, "step": 4071 }, { "epoch": 0.14582699160205562, "grad_norm": 1.5201910734176636, "learning_rate": 0.00019304665648527935, "loss": 1.4193, "step": 4072 }, { "epoch": 0.1458628037316239, "grad_norm": 1.4494929313659668, "learning_rate": 0.00019304240624994328, "loss": 1.7287, "step": 4073 }, { "epoch": 0.14589861586119218, "grad_norm": 2.1447548866271973, "learning_rate": 0.00019303815476284168, "loss": 1.4123, "step": 4074 }, { "epoch": 0.14593442799076048, "grad_norm": 2.7469265460968018, "learning_rate": 0.0001930339020240317, "loss": 1.7491, "step": 4075 }, { "epoch": 0.14597024012032875, "grad_norm": 1.5956698656082153, "learning_rate": 0.00019302964803357057, "loss": 1.7761, "step": 4076 }, { "epoch": 0.14600605224989704, "grad_norm": 1.6789740324020386, "learning_rate": 0.00019302539279151553, "loss": 1.571, "step": 4077 }, { "epoch": 0.1460418643794653, "grad_norm": 1.620974063873291, "learning_rate": 0.00019302113629792383, "loss": 1.1998, "step": 4078 }, { "epoch": 0.1460776765090336, "grad_norm": 1.7867196798324585, "learning_rate": 0.0001930168785528527, "loss": 1.6286, "step": 4079 }, { "epoch": 0.1461134886386019, "grad_norm": 1.3706187009811401, "learning_rate": 0.00019301261955635948, "loss": 1.7489, "step": 4080 }, { "epoch": 0.14614930076817018, "grad_norm": 1.409914493560791, "learning_rate": 0.00019300835930850143, "loss": 1.466, "step": 4081 }, { "epoch": 0.14618511289773847, "grad_norm": 1.6765789985656738, "learning_rate": 0.0001930040978093359, "loss": 1.4924, "step": 4082 }, { "epoch": 0.14622092502730674, "grad_norm": 1.7411587238311768, "learning_rate": 0.00019299983505892016, "loss": 1.467, "step": 4083 }, { "epoch": 0.14625673715687504, "grad_norm": 2.216538906097412, "learning_rate": 0.00019299557105731166, "loss": 1.4325, "step": 4084 }, { "epoch": 0.1462925492864433, "grad_norm": 1.6220505237579346, "learning_rate": 0.00019299130580456765, "loss": 1.5235, "step": 4085 }, { "epoch": 0.1463283614160116, "grad_norm": 1.5487260818481445, "learning_rate": 0.0001929870393007456, "loss": 1.7881, "step": 4086 }, { "epoch": 0.1463641735455799, "grad_norm": 1.7539433240890503, "learning_rate": 0.00019298277154590284, "loss": 1.3377, "step": 4087 }, { "epoch": 0.14639998567514817, "grad_norm": 1.9030964374542236, "learning_rate": 0.0001929785025400969, "loss": 1.5991, "step": 4088 }, { "epoch": 0.14643579780471647, "grad_norm": 1.8642268180847168, "learning_rate": 0.0001929742322833851, "loss": 1.5565, "step": 4089 }, { "epoch": 0.14647160993428474, "grad_norm": 2.047663450241089, "learning_rate": 0.00019296996077582492, "loss": 1.4049, "step": 4090 }, { "epoch": 0.14650742206385303, "grad_norm": 1.8870108127593994, "learning_rate": 0.00019296568801747385, "loss": 1.6984, "step": 4091 }, { "epoch": 0.1465432341934213, "grad_norm": 1.8283346891403198, "learning_rate": 0.00019296141400838938, "loss": 1.5549, "step": 4092 }, { "epoch": 0.1465790463229896, "grad_norm": 1.5765265226364136, "learning_rate": 0.00019295713874862896, "loss": 1.3471, "step": 4093 }, { "epoch": 0.1466148584525579, "grad_norm": 1.2447913885116577, "learning_rate": 0.0001929528622382502, "loss": 1.5457, "step": 4094 }, { "epoch": 0.14665067058212616, "grad_norm": 1.6574128866195679, "learning_rate": 0.00019294858447731054, "loss": 1.4687, "step": 4095 }, { "epoch": 0.14668648271169446, "grad_norm": 2.061941385269165, "learning_rate": 0.0001929443054658676, "loss": 1.8521, "step": 4096 }, { "epoch": 0.14672229484126273, "grad_norm": 1.5628901720046997, "learning_rate": 0.00019294002520397888, "loss": 1.6355, "step": 4097 }, { "epoch": 0.14675810697083103, "grad_norm": 1.9049891233444214, "learning_rate": 0.000192935743691702, "loss": 1.5275, "step": 4098 }, { "epoch": 0.1467939191003993, "grad_norm": 1.6401816606521606, "learning_rate": 0.00019293146092909462, "loss": 1.5944, "step": 4099 }, { "epoch": 0.1468297312299676, "grad_norm": 1.786863923072815, "learning_rate": 0.00019292717691621428, "loss": 1.579, "step": 4100 }, { "epoch": 0.1468655433595359, "grad_norm": 1.6657721996307373, "learning_rate": 0.00019292289165311863, "loss": 1.6496, "step": 4101 }, { "epoch": 0.14690135548910416, "grad_norm": 1.5139931440353394, "learning_rate": 0.00019291860513986534, "loss": 1.8056, "step": 4102 }, { "epoch": 0.14693716761867245, "grad_norm": 2.9963998794555664, "learning_rate": 0.0001929143173765121, "loss": 1.6924, "step": 4103 }, { "epoch": 0.14697297974824072, "grad_norm": 1.4028496742248535, "learning_rate": 0.00019291002836311654, "loss": 1.7265, "step": 4104 }, { "epoch": 0.14700879187780902, "grad_norm": 1.964406132698059, "learning_rate": 0.0001929057380997364, "loss": 1.3505, "step": 4105 }, { "epoch": 0.1470446040073773, "grad_norm": 1.6631462574005127, "learning_rate": 0.0001929014465864294, "loss": 1.5789, "step": 4106 }, { "epoch": 0.14708041613694559, "grad_norm": 1.476067066192627, "learning_rate": 0.00019289715382325327, "loss": 1.4211, "step": 4107 }, { "epoch": 0.14711622826651388, "grad_norm": 2.226459264755249, "learning_rate": 0.00019289285981026577, "loss": 1.8789, "step": 4108 }, { "epoch": 0.14715204039608215, "grad_norm": 1.4748985767364502, "learning_rate": 0.00019288856454752464, "loss": 1.4772, "step": 4109 }, { "epoch": 0.14718785252565045, "grad_norm": 1.3329429626464844, "learning_rate": 0.0001928842680350877, "loss": 1.4615, "step": 4110 }, { "epoch": 0.14722366465521872, "grad_norm": 1.498295783996582, "learning_rate": 0.00019287997027301275, "loss": 1.5852, "step": 4111 }, { "epoch": 0.147259476784787, "grad_norm": 1.641626238822937, "learning_rate": 0.00019287567126135763, "loss": 1.3915, "step": 4112 }, { "epoch": 0.14729528891435528, "grad_norm": 1.6395761966705322, "learning_rate": 0.00019287137100018013, "loss": 2.055, "step": 4113 }, { "epoch": 0.14733110104392358, "grad_norm": 1.7023566961288452, "learning_rate": 0.00019286706948953812, "loss": 1.8299, "step": 4114 }, { "epoch": 0.14736691317349185, "grad_norm": 1.3379522562026978, "learning_rate": 0.00019286276672948952, "loss": 1.6071, "step": 4115 }, { "epoch": 0.14740272530306014, "grad_norm": 1.9350535869598389, "learning_rate": 0.00019285846272009213, "loss": 1.64, "step": 4116 }, { "epoch": 0.14743853743262844, "grad_norm": 1.9311000108718872, "learning_rate": 0.00019285415746140392, "loss": 1.7087, "step": 4117 }, { "epoch": 0.1474743495621967, "grad_norm": 1.5109360218048096, "learning_rate": 0.0001928498509534828, "loss": 1.5686, "step": 4118 }, { "epoch": 0.147510161691765, "grad_norm": 1.3077818155288696, "learning_rate": 0.0001928455431963867, "loss": 1.488, "step": 4119 }, { "epoch": 0.14754597382133328, "grad_norm": 1.6297913789749146, "learning_rate": 0.00019284123419017357, "loss": 1.8312, "step": 4120 }, { "epoch": 0.14758178595090157, "grad_norm": 1.9442793130874634, "learning_rate": 0.0001928369239349014, "loss": 1.6814, "step": 4121 }, { "epoch": 0.14761759808046984, "grad_norm": 1.7997331619262695, "learning_rate": 0.00019283261243062817, "loss": 1.6751, "step": 4122 }, { "epoch": 0.14765341021003814, "grad_norm": 1.4392348527908325, "learning_rate": 0.0001928282996774119, "loss": 1.5335, "step": 4123 }, { "epoch": 0.14768922233960644, "grad_norm": 1.7808204889297485, "learning_rate": 0.00019282398567531058, "loss": 1.5752, "step": 4124 }, { "epoch": 0.1477250344691747, "grad_norm": 1.8829363584518433, "learning_rate": 0.00019281967042438227, "loss": 1.5472, "step": 4125 }, { "epoch": 0.147760846598743, "grad_norm": 1.8297572135925293, "learning_rate": 0.000192815353924685, "loss": 1.6218, "step": 4126 }, { "epoch": 0.14779665872831127, "grad_norm": 1.4455066919326782, "learning_rate": 0.0001928110361762769, "loss": 1.5734, "step": 4127 }, { "epoch": 0.14783247085787957, "grad_norm": 1.5757455825805664, "learning_rate": 0.000192806717179216, "loss": 1.561, "step": 4128 }, { "epoch": 0.14786828298744784, "grad_norm": 1.614942193031311, "learning_rate": 0.00019280239693356048, "loss": 1.5828, "step": 4129 }, { "epoch": 0.14790409511701613, "grad_norm": 1.358445405960083, "learning_rate": 0.0001927980754393684, "loss": 1.7682, "step": 4130 }, { "epoch": 0.14793990724658443, "grad_norm": 2.172544240951538, "learning_rate": 0.00019279375269669785, "loss": 1.5469, "step": 4131 }, { "epoch": 0.1479757193761527, "grad_norm": 1.9252666234970093, "learning_rate": 0.00019278942870560713, "loss": 1.8746, "step": 4132 }, { "epoch": 0.148011531505721, "grad_norm": 1.5914863348007202, "learning_rate": 0.0001927851034661543, "loss": 1.3982, "step": 4133 }, { "epoch": 0.14804734363528926, "grad_norm": 1.750622272491455, "learning_rate": 0.0001927807769783976, "loss": 1.4232, "step": 4134 }, { "epoch": 0.14808315576485756, "grad_norm": 2.3878767490386963, "learning_rate": 0.0001927764492423952, "loss": 1.6743, "step": 4135 }, { "epoch": 0.14811896789442583, "grad_norm": 1.5392639636993408, "learning_rate": 0.0001927721202582054, "loss": 1.5216, "step": 4136 }, { "epoch": 0.14815478002399413, "grad_norm": 1.3905284404754639, "learning_rate": 0.00019276779002588634, "loss": 1.6186, "step": 4137 }, { "epoch": 0.14819059215356242, "grad_norm": 1.1709767580032349, "learning_rate": 0.00019276345854549634, "loss": 1.6602, "step": 4138 }, { "epoch": 0.1482264042831307, "grad_norm": 1.6870055198669434, "learning_rate": 0.00019275912581709367, "loss": 1.8051, "step": 4139 }, { "epoch": 0.148262216412699, "grad_norm": 1.4576029777526855, "learning_rate": 0.0001927547918407366, "loss": 1.6819, "step": 4140 }, { "epoch": 0.14829802854226726, "grad_norm": 1.3755351305007935, "learning_rate": 0.00019275045661648344, "loss": 1.6577, "step": 4141 }, { "epoch": 0.14833384067183555, "grad_norm": 1.6038774251937866, "learning_rate": 0.00019274612014439258, "loss": 1.6699, "step": 4142 }, { "epoch": 0.14836965280140382, "grad_norm": 1.760777473449707, "learning_rate": 0.00019274178242452224, "loss": 1.6629, "step": 4143 }, { "epoch": 0.14840546493097212, "grad_norm": 2.301041841506958, "learning_rate": 0.0001927374434569309, "loss": 1.7042, "step": 4144 }, { "epoch": 0.14844127706054042, "grad_norm": 1.588143229484558, "learning_rate": 0.00019273310324167687, "loss": 1.8193, "step": 4145 }, { "epoch": 0.14847708919010869, "grad_norm": 1.4113539457321167, "learning_rate": 0.00019272876177881852, "loss": 1.3863, "step": 4146 }, { "epoch": 0.14851290131967698, "grad_norm": 2.251722812652588, "learning_rate": 0.00019272441906841432, "loss": 1.5958, "step": 4147 }, { "epoch": 0.14854871344924525, "grad_norm": 1.7772836685180664, "learning_rate": 0.00019272007511052266, "loss": 1.7736, "step": 4148 }, { "epoch": 0.14858452557881355, "grad_norm": 1.554674744606018, "learning_rate": 0.000192715729905202, "loss": 1.6975, "step": 4149 }, { "epoch": 0.14862033770838182, "grad_norm": 1.403333306312561, "learning_rate": 0.00019271138345251077, "loss": 1.7412, "step": 4150 }, { "epoch": 0.1486561498379501, "grad_norm": 1.7260535955429077, "learning_rate": 0.00019270703575250748, "loss": 1.5755, "step": 4151 }, { "epoch": 0.1486919619675184, "grad_norm": 2.66255784034729, "learning_rate": 0.0001927026868052506, "loss": 1.34, "step": 4152 }, { "epoch": 0.14872777409708668, "grad_norm": 1.3532320261001587, "learning_rate": 0.00019269833661079866, "loss": 1.6399, "step": 4153 }, { "epoch": 0.14876358622665498, "grad_norm": 3.2480742931365967, "learning_rate": 0.00019269398516921015, "loss": 2.3012, "step": 4154 }, { "epoch": 0.14879939835622324, "grad_norm": 1.9341964721679688, "learning_rate": 0.00019268963248054367, "loss": 1.818, "step": 4155 }, { "epoch": 0.14883521048579154, "grad_norm": 1.7220872640609741, "learning_rate": 0.00019268527854485773, "loss": 1.5048, "step": 4156 }, { "epoch": 0.1488710226153598, "grad_norm": 1.5572283267974854, "learning_rate": 0.0001926809233622109, "loss": 1.9191, "step": 4157 }, { "epoch": 0.1489068347449281, "grad_norm": 2.1267168521881104, "learning_rate": 0.0001926765669326618, "loss": 1.3807, "step": 4158 }, { "epoch": 0.1489426468744964, "grad_norm": 2.1267030239105225, "learning_rate": 0.00019267220925626907, "loss": 1.7096, "step": 4159 }, { "epoch": 0.14897845900406467, "grad_norm": 1.8242478370666504, "learning_rate": 0.00019266785033309128, "loss": 1.2927, "step": 4160 }, { "epoch": 0.14901427113363297, "grad_norm": 1.9160438776016235, "learning_rate": 0.0001926634901631871, "loss": 1.6361, "step": 4161 }, { "epoch": 0.14905008326320124, "grad_norm": 2.731289863586426, "learning_rate": 0.00019265912874661515, "loss": 1.3703, "step": 4162 }, { "epoch": 0.14908589539276954, "grad_norm": 1.2577362060546875, "learning_rate": 0.0001926547660834342, "loss": 1.7034, "step": 4163 }, { "epoch": 0.1491217075223378, "grad_norm": 2.197948932647705, "learning_rate": 0.00019265040217370286, "loss": 1.4072, "step": 4164 }, { "epoch": 0.1491575196519061, "grad_norm": 1.9935760498046875, "learning_rate": 0.0001926460370174799, "loss": 1.3064, "step": 4165 }, { "epoch": 0.1491933317814744, "grad_norm": 1.7972018718719482, "learning_rate": 0.00019264167061482397, "loss": 1.4971, "step": 4166 }, { "epoch": 0.14922914391104267, "grad_norm": 1.4733848571777344, "learning_rate": 0.0001926373029657939, "loss": 1.6895, "step": 4167 }, { "epoch": 0.14926495604061096, "grad_norm": 1.9356968402862549, "learning_rate": 0.00019263293407044838, "loss": 1.792, "step": 4168 }, { "epoch": 0.14930076817017923, "grad_norm": 1.5070140361785889, "learning_rate": 0.00019262856392884625, "loss": 1.6962, "step": 4169 }, { "epoch": 0.14933658029974753, "grad_norm": 1.9698539972305298, "learning_rate": 0.00019262419254104628, "loss": 1.7214, "step": 4170 }, { "epoch": 0.1493723924293158, "grad_norm": 1.7777533531188965, "learning_rate": 0.00019261981990710723, "loss": 1.6434, "step": 4171 }, { "epoch": 0.1494082045588841, "grad_norm": 1.478132963180542, "learning_rate": 0.000192615446027088, "loss": 1.493, "step": 4172 }, { "epoch": 0.1494440166884524, "grad_norm": 1.4548566341400146, "learning_rate": 0.00019261107090104743, "loss": 1.5991, "step": 4173 }, { "epoch": 0.14947982881802066, "grad_norm": 1.5640240907669067, "learning_rate": 0.00019260669452904433, "loss": 1.3423, "step": 4174 }, { "epoch": 0.14951564094758896, "grad_norm": 1.3845349550247192, "learning_rate": 0.00019260231691113763, "loss": 1.8214, "step": 4175 }, { "epoch": 0.14955145307715723, "grad_norm": 3.009308099746704, "learning_rate": 0.00019259793804738619, "loss": 1.6082, "step": 4176 }, { "epoch": 0.14958726520672552, "grad_norm": 1.3763492107391357, "learning_rate": 0.00019259355793784893, "loss": 1.6317, "step": 4177 }, { "epoch": 0.1496230773362938, "grad_norm": 1.6250135898590088, "learning_rate": 0.00019258917658258483, "loss": 1.7019, "step": 4178 }, { "epoch": 0.1496588894658621, "grad_norm": 1.358526349067688, "learning_rate": 0.00019258479398165273, "loss": 1.6496, "step": 4179 }, { "epoch": 0.14969470159543039, "grad_norm": 2.3600802421569824, "learning_rate": 0.00019258041013511167, "loss": 1.5639, "step": 4180 }, { "epoch": 0.14973051372499865, "grad_norm": 2.442429542541504, "learning_rate": 0.00019257602504302063, "loss": 1.8827, "step": 4181 }, { "epoch": 0.14976632585456695, "grad_norm": 2.7102713584899902, "learning_rate": 0.0001925716387054386, "loss": 1.8546, "step": 4182 }, { "epoch": 0.14980213798413522, "grad_norm": 2.178633689880371, "learning_rate": 0.00019256725112242455, "loss": 2.0248, "step": 4183 }, { "epoch": 0.14983795011370352, "grad_norm": 1.9340856075286865, "learning_rate": 0.00019256286229403754, "loss": 1.5944, "step": 4184 }, { "epoch": 0.14987376224327179, "grad_norm": 1.5424754619598389, "learning_rate": 0.00019255847222033663, "loss": 1.711, "step": 4185 }, { "epoch": 0.14990957437284008, "grad_norm": 2.0131659507751465, "learning_rate": 0.00019255408090138086, "loss": 1.7849, "step": 4186 }, { "epoch": 0.14994538650240838, "grad_norm": 1.3488818407058716, "learning_rate": 0.00019254968833722934, "loss": 1.567, "step": 4187 }, { "epoch": 0.14998119863197665, "grad_norm": 1.4595369100570679, "learning_rate": 0.0001925452945279411, "loss": 1.5943, "step": 4188 }, { "epoch": 0.15001701076154494, "grad_norm": 2.130014419555664, "learning_rate": 0.00019254089947357534, "loss": 1.6016, "step": 4189 }, { "epoch": 0.1500528228911132, "grad_norm": 1.4605454206466675, "learning_rate": 0.00019253650317419113, "loss": 1.6543, "step": 4190 }, { "epoch": 0.1500886350206815, "grad_norm": 1.6974399089813232, "learning_rate": 0.0001925321056298476, "loss": 1.473, "step": 4191 }, { "epoch": 0.15012444715024978, "grad_norm": 1.9349365234375, "learning_rate": 0.000192527706840604, "loss": 1.4904, "step": 4192 }, { "epoch": 0.15016025927981808, "grad_norm": 1.4719817638397217, "learning_rate": 0.00019252330680651945, "loss": 1.6002, "step": 4193 }, { "epoch": 0.15019607140938637, "grad_norm": 1.5579456090927124, "learning_rate": 0.0001925189055276531, "loss": 1.2423, "step": 4194 }, { "epoch": 0.15023188353895464, "grad_norm": 1.1475658416748047, "learning_rate": 0.00019251450300406426, "loss": 1.4654, "step": 4195 }, { "epoch": 0.15026769566852294, "grad_norm": 2.370964527130127, "learning_rate": 0.00019251009923581213, "loss": 1.782, "step": 4196 }, { "epoch": 0.1503035077980912, "grad_norm": 1.2127045392990112, "learning_rate": 0.0001925056942229559, "loss": 1.5602, "step": 4197 }, { "epoch": 0.1503393199276595, "grad_norm": 1.3405200242996216, "learning_rate": 0.00019250128796555492, "loss": 1.7569, "step": 4198 }, { "epoch": 0.15037513205722777, "grad_norm": 1.5121461153030396, "learning_rate": 0.0001924968804636684, "loss": 1.5301, "step": 4199 }, { "epoch": 0.15041094418679607, "grad_norm": 1.8312376737594604, "learning_rate": 0.0001924924717173557, "loss": 1.5413, "step": 4200 }, { "epoch": 0.15044675631636437, "grad_norm": 1.5825523138046265, "learning_rate": 0.00019248806172667606, "loss": 1.7082, "step": 4201 }, { "epoch": 0.15048256844593264, "grad_norm": 1.664043664932251, "learning_rate": 0.00019248365049168888, "loss": 1.6665, "step": 4202 }, { "epoch": 0.15051838057550093, "grad_norm": 1.4566935300827026, "learning_rate": 0.00019247923801245345, "loss": 1.4875, "step": 4203 }, { "epoch": 0.1505541927050692, "grad_norm": 1.3645408153533936, "learning_rate": 0.0001924748242890292, "loss": 1.578, "step": 4204 }, { "epoch": 0.1505900048346375, "grad_norm": 1.360119104385376, "learning_rate": 0.00019247040932147546, "loss": 1.6422, "step": 4205 }, { "epoch": 0.15062581696420577, "grad_norm": 1.9559032917022705, "learning_rate": 0.00019246599310985163, "loss": 1.64, "step": 4206 }, { "epoch": 0.15066162909377406, "grad_norm": 1.3689755201339722, "learning_rate": 0.0001924615756542171, "loss": 1.5453, "step": 4207 }, { "epoch": 0.15069744122334236, "grad_norm": 2.615208864212036, "learning_rate": 0.0001924571569546314, "loss": 1.7152, "step": 4208 }, { "epoch": 0.15073325335291063, "grad_norm": 1.3856520652770996, "learning_rate": 0.00019245273701115387, "loss": 1.5176, "step": 4209 }, { "epoch": 0.15076906548247893, "grad_norm": 1.9545027017593384, "learning_rate": 0.00019244831582384406, "loss": 1.38, "step": 4210 }, { "epoch": 0.1508048776120472, "grad_norm": 1.9589897394180298, "learning_rate": 0.0001924438933927614, "loss": 1.5865, "step": 4211 }, { "epoch": 0.1508406897416155, "grad_norm": 1.9281854629516602, "learning_rate": 0.00019243946971796535, "loss": 1.2587, "step": 4212 }, { "epoch": 0.15087650187118376, "grad_norm": 2.179154396057129, "learning_rate": 0.00019243504479951552, "loss": 1.6455, "step": 4213 }, { "epoch": 0.15091231400075206, "grad_norm": 1.6948529481887817, "learning_rate": 0.00019243061863747138, "loss": 1.5106, "step": 4214 }, { "epoch": 0.15094812613032033, "grad_norm": 1.5750123262405396, "learning_rate": 0.0001924261912318925, "loss": 1.7466, "step": 4215 }, { "epoch": 0.15098393825988862, "grad_norm": 1.5252922773361206, "learning_rate": 0.00019242176258283845, "loss": 1.4174, "step": 4216 }, { "epoch": 0.15101975038945692, "grad_norm": 1.4911702871322632, "learning_rate": 0.00019241733269036878, "loss": 1.792, "step": 4217 }, { "epoch": 0.1510555625190252, "grad_norm": 2.6139893531799316, "learning_rate": 0.0001924129015545431, "loss": 1.496, "step": 4218 }, { "epoch": 0.15109137464859348, "grad_norm": 1.4543615579605103, "learning_rate": 0.00019240846917542107, "loss": 1.3647, "step": 4219 }, { "epoch": 0.15112718677816175, "grad_norm": 2.1806602478027344, "learning_rate": 0.00019240403555306225, "loss": 1.8613, "step": 4220 }, { "epoch": 0.15116299890773005, "grad_norm": 1.576529622077942, "learning_rate": 0.00019239960068752633, "loss": 1.5466, "step": 4221 }, { "epoch": 0.15119881103729832, "grad_norm": 1.780757188796997, "learning_rate": 0.00019239516457887298, "loss": 1.732, "step": 4222 }, { "epoch": 0.15123462316686662, "grad_norm": 1.6536751985549927, "learning_rate": 0.00019239072722716186, "loss": 1.5458, "step": 4223 }, { "epoch": 0.1512704352964349, "grad_norm": 3.4212424755096436, "learning_rate": 0.0001923862886324527, "loss": 1.6249, "step": 4224 }, { "epoch": 0.15130624742600318, "grad_norm": 2.4516830444335938, "learning_rate": 0.00019238184879480518, "loss": 1.722, "step": 4225 }, { "epoch": 0.15134205955557148, "grad_norm": 1.8271950483322144, "learning_rate": 0.00019237740771427906, "loss": 1.3643, "step": 4226 }, { "epoch": 0.15137787168513975, "grad_norm": 2.7619059085845947, "learning_rate": 0.00019237296539093408, "loss": 1.786, "step": 4227 }, { "epoch": 0.15141368381470804, "grad_norm": 2.0190656185150146, "learning_rate": 0.00019236852182482998, "loss": 1.1294, "step": 4228 }, { "epoch": 0.1514494959442763, "grad_norm": 2.5435988903045654, "learning_rate": 0.0001923640770160266, "loss": 1.965, "step": 4229 }, { "epoch": 0.1514853080738446, "grad_norm": 1.3937095403671265, "learning_rate": 0.00019235963096458366, "loss": 1.7719, "step": 4230 }, { "epoch": 0.1515211202034129, "grad_norm": 1.7603644132614136, "learning_rate": 0.00019235518367056106, "loss": 1.9055, "step": 4231 }, { "epoch": 0.15155693233298118, "grad_norm": 1.535352349281311, "learning_rate": 0.0001923507351340186, "loss": 1.7588, "step": 4232 }, { "epoch": 0.15159274446254947, "grad_norm": 1.4259074926376343, "learning_rate": 0.00019234628535501607, "loss": 1.5549, "step": 4233 }, { "epoch": 0.15162855659211774, "grad_norm": 1.2318248748779297, "learning_rate": 0.00019234183433361344, "loss": 1.6689, "step": 4234 }, { "epoch": 0.15166436872168604, "grad_norm": 1.5642166137695312, "learning_rate": 0.0001923373820698705, "loss": 1.5143, "step": 4235 }, { "epoch": 0.1517001808512543, "grad_norm": 2.0307435989379883, "learning_rate": 0.00019233292856384723, "loss": 1.6097, "step": 4236 }, { "epoch": 0.1517359929808226, "grad_norm": 1.8250125646591187, "learning_rate": 0.00019232847381560347, "loss": 1.7707, "step": 4237 }, { "epoch": 0.1517718051103909, "grad_norm": 1.4493552446365356, "learning_rate": 0.00019232401782519923, "loss": 1.683, "step": 4238 }, { "epoch": 0.15180761723995917, "grad_norm": 1.6072957515716553, "learning_rate": 0.0001923195605926944, "loss": 1.5616, "step": 4239 }, { "epoch": 0.15184342936952747, "grad_norm": 2.072479724884033, "learning_rate": 0.00019231510211814896, "loss": 1.5442, "step": 4240 }, { "epoch": 0.15187924149909574, "grad_norm": 2.20919132232666, "learning_rate": 0.0001923106424016229, "loss": 1.8976, "step": 4241 }, { "epoch": 0.15191505362866403, "grad_norm": 1.0948854684829712, "learning_rate": 0.00019230618144317624, "loss": 1.4725, "step": 4242 }, { "epoch": 0.1519508657582323, "grad_norm": 1.5968300104141235, "learning_rate": 0.00019230171924286896, "loss": 1.4642, "step": 4243 }, { "epoch": 0.1519866778878006, "grad_norm": 1.6487561464309692, "learning_rate": 0.0001922972558007611, "loss": 1.7243, "step": 4244 }, { "epoch": 0.1520224900173689, "grad_norm": 1.4044201374053955, "learning_rate": 0.00019229279111691272, "loss": 1.6046, "step": 4245 }, { "epoch": 0.15205830214693716, "grad_norm": 1.3864117860794067, "learning_rate": 0.0001922883251913839, "loss": 1.6748, "step": 4246 }, { "epoch": 0.15209411427650546, "grad_norm": 2.1303157806396484, "learning_rate": 0.00019228385802423469, "loss": 1.7468, "step": 4247 }, { "epoch": 0.15212992640607373, "grad_norm": 2.085751533508301, "learning_rate": 0.0001922793896155252, "loss": 1.9624, "step": 4248 }, { "epoch": 0.15216573853564203, "grad_norm": 1.5157827138900757, "learning_rate": 0.00019227491996531558, "loss": 1.6942, "step": 4249 }, { "epoch": 0.1522015506652103, "grad_norm": 2.0757040977478027, "learning_rate": 0.00019227044907366595, "loss": 1.4183, "step": 4250 }, { "epoch": 0.1522373627947786, "grad_norm": 1.8636008501052856, "learning_rate": 0.00019226597694063638, "loss": 1.5945, "step": 4251 }, { "epoch": 0.1522731749243469, "grad_norm": 1.7461687326431274, "learning_rate": 0.0001922615035662872, "loss": 1.5256, "step": 4252 }, { "epoch": 0.15230898705391516, "grad_norm": 1.3671756982803345, "learning_rate": 0.00019225702895067843, "loss": 1.4619, "step": 4253 }, { "epoch": 0.15234479918348345, "grad_norm": 1.5393552780151367, "learning_rate": 0.00019225255309387036, "loss": 1.728, "step": 4254 }, { "epoch": 0.15238061131305172, "grad_norm": 2.7406222820281982, "learning_rate": 0.00019224807599592318, "loss": 1.7323, "step": 4255 }, { "epoch": 0.15241642344262002, "grad_norm": 1.4855775833129883, "learning_rate": 0.00019224359765689713, "loss": 1.7204, "step": 4256 }, { "epoch": 0.1524522355721883, "grad_norm": 1.3277029991149902, "learning_rate": 0.00019223911807685244, "loss": 1.5232, "step": 4257 }, { "epoch": 0.15248804770175658, "grad_norm": 1.8301455974578857, "learning_rate": 0.00019223463725584944, "loss": 1.5253, "step": 4258 }, { "epoch": 0.15252385983132488, "grad_norm": 1.7147241830825806, "learning_rate": 0.00019223015519394834, "loss": 1.6035, "step": 4259 }, { "epoch": 0.15255967196089315, "grad_norm": 2.0909276008605957, "learning_rate": 0.00019222567189120947, "loss": 1.3304, "step": 4260 }, { "epoch": 0.15259548409046145, "grad_norm": 2.0987610816955566, "learning_rate": 0.00019222118734769317, "loss": 1.3313, "step": 4261 }, { "epoch": 0.15263129622002972, "grad_norm": 2.262599229812622, "learning_rate": 0.00019221670156345971, "loss": 1.5674, "step": 4262 }, { "epoch": 0.152667108349598, "grad_norm": 1.5876708030700684, "learning_rate": 0.00019221221453856954, "loss": 1.7479, "step": 4263 }, { "epoch": 0.15270292047916628, "grad_norm": 1.6196600198745728, "learning_rate": 0.00019220772627308292, "loss": 1.5288, "step": 4264 }, { "epoch": 0.15273873260873458, "grad_norm": 1.6314212083816528, "learning_rate": 0.00019220323676706028, "loss": 1.6841, "step": 4265 }, { "epoch": 0.15277454473830288, "grad_norm": 2.1747360229492188, "learning_rate": 0.00019219874602056204, "loss": 1.8169, "step": 4266 }, { "epoch": 0.15281035686787114, "grad_norm": 1.5729329586029053, "learning_rate": 0.0001921942540336486, "loss": 1.5527, "step": 4267 }, { "epoch": 0.15284616899743944, "grad_norm": 2.512627124786377, "learning_rate": 0.00019218976080638043, "loss": 2.074, "step": 4268 }, { "epoch": 0.1528819811270077, "grad_norm": 1.6818349361419678, "learning_rate": 0.0001921852663388179, "loss": 1.5731, "step": 4269 }, { "epoch": 0.152917793256576, "grad_norm": 1.7092454433441162, "learning_rate": 0.0001921807706310215, "loss": 1.5929, "step": 4270 }, { "epoch": 0.15295360538614428, "grad_norm": 1.1693865060806274, "learning_rate": 0.00019217627368305176, "loss": 1.7299, "step": 4271 }, { "epoch": 0.15298941751571257, "grad_norm": 1.8270220756530762, "learning_rate": 0.0001921717754949692, "loss": 1.6689, "step": 4272 }, { "epoch": 0.15302522964528087, "grad_norm": 1.5601493120193481, "learning_rate": 0.00019216727606683425, "loss": 1.5714, "step": 4273 }, { "epoch": 0.15306104177484914, "grad_norm": 1.6041169166564941, "learning_rate": 0.00019216277539870752, "loss": 1.5596, "step": 4274 }, { "epoch": 0.15309685390441743, "grad_norm": 1.5292716026306152, "learning_rate": 0.00019215827349064948, "loss": 1.6235, "step": 4275 }, { "epoch": 0.1531326660339857, "grad_norm": 2.1486856937408447, "learning_rate": 0.00019215377034272074, "loss": 1.7512, "step": 4276 }, { "epoch": 0.153168478163554, "grad_norm": 1.281849980354309, "learning_rate": 0.00019214926595498196, "loss": 1.5935, "step": 4277 }, { "epoch": 0.15320429029312227, "grad_norm": 2.1205649375915527, "learning_rate": 0.0001921447603274936, "loss": 1.6512, "step": 4278 }, { "epoch": 0.15324010242269057, "grad_norm": 1.811474323272705, "learning_rate": 0.0001921402534603164, "loss": 1.7293, "step": 4279 }, { "epoch": 0.15327591455225886, "grad_norm": 1.8452061414718628, "learning_rate": 0.00019213574535351092, "loss": 1.6552, "step": 4280 }, { "epoch": 0.15331172668182713, "grad_norm": 2.1379551887512207, "learning_rate": 0.00019213123600713783, "loss": 1.6141, "step": 4281 }, { "epoch": 0.15334753881139543, "grad_norm": 1.5196058750152588, "learning_rate": 0.0001921267254212578, "loss": 1.5217, "step": 4282 }, { "epoch": 0.1533833509409637, "grad_norm": 1.5301889181137085, "learning_rate": 0.00019212221359593152, "loss": 1.2763, "step": 4283 }, { "epoch": 0.153419163070532, "grad_norm": 1.7867982387542725, "learning_rate": 0.00019211770053121968, "loss": 1.7346, "step": 4284 }, { "epoch": 0.15345497520010026, "grad_norm": 2.244570016860962, "learning_rate": 0.000192113186227183, "loss": 1.5217, "step": 4285 }, { "epoch": 0.15349078732966856, "grad_norm": 2.4065821170806885, "learning_rate": 0.0001921086706838822, "loss": 1.4745, "step": 4286 }, { "epoch": 0.15352659945923686, "grad_norm": 1.6467057466506958, "learning_rate": 0.0001921041539013781, "loss": 1.5964, "step": 4287 }, { "epoch": 0.15356241158880513, "grad_norm": 1.5443044900894165, "learning_rate": 0.00019209963587973138, "loss": 1.8898, "step": 4288 }, { "epoch": 0.15359822371837342, "grad_norm": 2.1365907192230225, "learning_rate": 0.00019209511661900285, "loss": 1.4256, "step": 4289 }, { "epoch": 0.1536340358479417, "grad_norm": 1.4355337619781494, "learning_rate": 0.00019209059611925336, "loss": 1.4913, "step": 4290 }, { "epoch": 0.15366984797751, "grad_norm": 1.4051859378814697, "learning_rate": 0.00019208607438054364, "loss": 1.547, "step": 4291 }, { "epoch": 0.15370566010707826, "grad_norm": 1.3148490190505981, "learning_rate": 0.0001920815514029346, "loss": 1.7636, "step": 4292 }, { "epoch": 0.15374147223664655, "grad_norm": 1.518978476524353, "learning_rate": 0.00019207702718648705, "loss": 1.4276, "step": 4293 }, { "epoch": 0.15377728436621485, "grad_norm": 1.5426995754241943, "learning_rate": 0.00019207250173126187, "loss": 1.5931, "step": 4294 }, { "epoch": 0.15381309649578312, "grad_norm": 1.3305436372756958, "learning_rate": 0.00019206797503731996, "loss": 1.5414, "step": 4295 }, { "epoch": 0.15384890862535142, "grad_norm": 1.3565562963485718, "learning_rate": 0.0001920634471047222, "loss": 1.4051, "step": 4296 }, { "epoch": 0.15388472075491968, "grad_norm": 1.4529234170913696, "learning_rate": 0.0001920589179335295, "loss": 1.7174, "step": 4297 }, { "epoch": 0.15392053288448798, "grad_norm": 1.6896700859069824, "learning_rate": 0.00019205438752380283, "loss": 1.5858, "step": 4298 }, { "epoch": 0.15395634501405625, "grad_norm": 2.139944314956665, "learning_rate": 0.00019204985587560307, "loss": 1.871, "step": 4299 }, { "epoch": 0.15399215714362455, "grad_norm": 2.2128217220306396, "learning_rate": 0.00019204532298899127, "loss": 1.7976, "step": 4300 }, { "epoch": 0.15402796927319284, "grad_norm": 1.833006501197815, "learning_rate": 0.0001920407888640284, "loss": 1.8781, "step": 4301 }, { "epoch": 0.1540637814027611, "grad_norm": 1.8609815835952759, "learning_rate": 0.00019203625350077541, "loss": 1.89, "step": 4302 }, { "epoch": 0.1540995935323294, "grad_norm": 1.195251703262329, "learning_rate": 0.00019203171689929333, "loss": 1.6183, "step": 4303 }, { "epoch": 0.15413540566189768, "grad_norm": 1.212415099143982, "learning_rate": 0.00019202717905964325, "loss": 1.4988, "step": 4304 }, { "epoch": 0.15417121779146598, "grad_norm": 1.2291085720062256, "learning_rate": 0.00019202263998188617, "loss": 1.6022, "step": 4305 }, { "epoch": 0.15420702992103424, "grad_norm": 1.3216090202331543, "learning_rate": 0.00019201809966608316, "loss": 1.5615, "step": 4306 }, { "epoch": 0.15424284205060254, "grad_norm": 2.6345887184143066, "learning_rate": 0.0001920135581122953, "loss": 1.4008, "step": 4307 }, { "epoch": 0.1542786541801708, "grad_norm": 2.237443208694458, "learning_rate": 0.00019200901532058376, "loss": 1.5771, "step": 4308 }, { "epoch": 0.1543144663097391, "grad_norm": 1.5229663848876953, "learning_rate": 0.00019200447129100954, "loss": 1.4401, "step": 4309 }, { "epoch": 0.1543502784393074, "grad_norm": 1.4248111248016357, "learning_rate": 0.00019199992602363385, "loss": 1.6815, "step": 4310 }, { "epoch": 0.15438609056887567, "grad_norm": 1.6188117265701294, "learning_rate": 0.00019199537951851788, "loss": 1.7073, "step": 4311 }, { "epoch": 0.15442190269844397, "grad_norm": 1.5151900053024292, "learning_rate": 0.0001919908317757227, "loss": 1.5666, "step": 4312 }, { "epoch": 0.15445771482801224, "grad_norm": 2.0137276649475098, "learning_rate": 0.00019198628279530952, "loss": 1.9482, "step": 4313 }, { "epoch": 0.15449352695758053, "grad_norm": 2.2357749938964844, "learning_rate": 0.00019198173257733961, "loss": 1.5095, "step": 4314 }, { "epoch": 0.1545293390871488, "grad_norm": 1.722032070159912, "learning_rate": 0.00019197718112187409, "loss": 1.2756, "step": 4315 }, { "epoch": 0.1545651512167171, "grad_norm": 1.9286119937896729, "learning_rate": 0.00019197262842897425, "loss": 1.2332, "step": 4316 }, { "epoch": 0.1546009633462854, "grad_norm": 1.6464585065841675, "learning_rate": 0.00019196807449870133, "loss": 1.768, "step": 4317 }, { "epoch": 0.15463677547585367, "grad_norm": 1.3432539701461792, "learning_rate": 0.00019196351933111662, "loss": 1.3892, "step": 4318 }, { "epoch": 0.15467258760542196, "grad_norm": 1.8034002780914307, "learning_rate": 0.00019195896292628138, "loss": 1.5942, "step": 4319 }, { "epoch": 0.15470839973499023, "grad_norm": 2.2822370529174805, "learning_rate": 0.00019195440528425688, "loss": 1.6211, "step": 4320 }, { "epoch": 0.15474421186455853, "grad_norm": 1.6064921617507935, "learning_rate": 0.00019194984640510447, "loss": 1.4712, "step": 4321 }, { "epoch": 0.1547800239941268, "grad_norm": 1.5366289615631104, "learning_rate": 0.00019194528628888554, "loss": 1.847, "step": 4322 }, { "epoch": 0.1548158361236951, "grad_norm": 1.3502074480056763, "learning_rate": 0.00019194072493566134, "loss": 1.9242, "step": 4323 }, { "epoch": 0.1548516482532634, "grad_norm": 1.394463300704956, "learning_rate": 0.00019193616234549328, "loss": 1.4996, "step": 4324 }, { "epoch": 0.15488746038283166, "grad_norm": 1.9019322395324707, "learning_rate": 0.00019193159851844276, "loss": 1.5957, "step": 4325 }, { "epoch": 0.15492327251239996, "grad_norm": 1.9424875974655151, "learning_rate": 0.00019192703345457114, "loss": 1.6297, "step": 4326 }, { "epoch": 0.15495908464196823, "grad_norm": 1.5669649839401245, "learning_rate": 0.00019192246715393988, "loss": 1.7531, "step": 4327 }, { "epoch": 0.15499489677153652, "grad_norm": 1.4756217002868652, "learning_rate": 0.0001919178996166104, "loss": 1.7796, "step": 4328 }, { "epoch": 0.1550307089011048, "grad_norm": 1.9263927936553955, "learning_rate": 0.00019191333084264412, "loss": 1.907, "step": 4329 }, { "epoch": 0.1550665210306731, "grad_norm": 1.990947961807251, "learning_rate": 0.00019190876083210258, "loss": 1.7693, "step": 4330 }, { "epoch": 0.15510233316024138, "grad_norm": 1.4774898290634155, "learning_rate": 0.00019190418958504716, "loss": 1.6509, "step": 4331 }, { "epoch": 0.15513814528980965, "grad_norm": 1.6892197132110596, "learning_rate": 0.00019189961710153948, "loss": 1.7677, "step": 4332 }, { "epoch": 0.15517395741937795, "grad_norm": 1.8601287603378296, "learning_rate": 0.00019189504338164095, "loss": 1.5163, "step": 4333 }, { "epoch": 0.15520976954894622, "grad_norm": 2.1573426723480225, "learning_rate": 0.00019189046842541316, "loss": 1.9389, "step": 4334 }, { "epoch": 0.15524558167851452, "grad_norm": 2.2533397674560547, "learning_rate": 0.00019188589223291763, "loss": 1.6179, "step": 4335 }, { "epoch": 0.15528139380808278, "grad_norm": 1.462187647819519, "learning_rate": 0.00019188131480421595, "loss": 1.5003, "step": 4336 }, { "epoch": 0.15531720593765108, "grad_norm": 1.378979206085205, "learning_rate": 0.0001918767361393697, "loss": 1.674, "step": 4337 }, { "epoch": 0.15535301806721938, "grad_norm": 1.7389543056488037, "learning_rate": 0.00019187215623844053, "loss": 1.5242, "step": 4338 }, { "epoch": 0.15538883019678765, "grad_norm": 1.7959777116775513, "learning_rate": 0.00019186757510148995, "loss": 1.5021, "step": 4339 }, { "epoch": 0.15542464232635594, "grad_norm": 1.8340651988983154, "learning_rate": 0.00019186299272857965, "loss": 1.6096, "step": 4340 }, { "epoch": 0.1554604544559242, "grad_norm": 1.2857201099395752, "learning_rate": 0.0001918584091197713, "loss": 1.5341, "step": 4341 }, { "epoch": 0.1554962665854925, "grad_norm": 1.4234918355941772, "learning_rate": 0.00019185382427512653, "loss": 1.3857, "step": 4342 }, { "epoch": 0.15553207871506078, "grad_norm": 1.4376306533813477, "learning_rate": 0.00019184923819470703, "loss": 1.683, "step": 4343 }, { "epoch": 0.15556789084462908, "grad_norm": 1.7839604616165161, "learning_rate": 0.0001918446508785745, "loss": 1.4875, "step": 4344 }, { "epoch": 0.15560370297419737, "grad_norm": 2.093585968017578, "learning_rate": 0.00019184006232679068, "loss": 1.708, "step": 4345 }, { "epoch": 0.15563951510376564, "grad_norm": 1.6490429639816284, "learning_rate": 0.00019183547253941733, "loss": 1.4749, "step": 4346 }, { "epoch": 0.15567532723333394, "grad_norm": 1.4258878231048584, "learning_rate": 0.0001918308815165161, "loss": 1.5839, "step": 4347 }, { "epoch": 0.1557111393629022, "grad_norm": 1.3602529764175415, "learning_rate": 0.0001918262892581488, "loss": 1.4586, "step": 4348 }, { "epoch": 0.1557469514924705, "grad_norm": 2.043414354324341, "learning_rate": 0.00019182169576437724, "loss": 1.6374, "step": 4349 }, { "epoch": 0.15578276362203877, "grad_norm": 1.3405836820602417, "learning_rate": 0.00019181710103526321, "loss": 1.7475, "step": 4350 }, { "epoch": 0.15581857575160707, "grad_norm": 1.6657061576843262, "learning_rate": 0.00019181250507086854, "loss": 1.4629, "step": 4351 }, { "epoch": 0.15585438788117537, "grad_norm": 1.7391798496246338, "learning_rate": 0.00019180790787125504, "loss": 1.6842, "step": 4352 }, { "epoch": 0.15589020001074363, "grad_norm": 1.8803669214248657, "learning_rate": 0.00019180330943648454, "loss": 1.7868, "step": 4353 }, { "epoch": 0.15592601214031193, "grad_norm": 1.6519395112991333, "learning_rate": 0.00019179870976661895, "loss": 1.5024, "step": 4354 }, { "epoch": 0.1559618242698802, "grad_norm": 1.9108158349990845, "learning_rate": 0.0001917941088617201, "loss": 1.5709, "step": 4355 }, { "epoch": 0.1559976363994485, "grad_norm": 1.362729787826538, "learning_rate": 0.00019178950672184996, "loss": 1.6681, "step": 4356 }, { "epoch": 0.15603344852901677, "grad_norm": 1.1817349195480347, "learning_rate": 0.0001917849033470704, "loss": 1.6116, "step": 4357 }, { "epoch": 0.15606926065858506, "grad_norm": 1.8846251964569092, "learning_rate": 0.00019178029873744335, "loss": 1.6507, "step": 4358 }, { "epoch": 0.15610507278815336, "grad_norm": 1.3459134101867676, "learning_rate": 0.00019177569289303078, "loss": 1.5352, "step": 4359 }, { "epoch": 0.15614088491772163, "grad_norm": 1.8612664937973022, "learning_rate": 0.00019177108581389462, "loss": 1.7641, "step": 4360 }, { "epoch": 0.15617669704728993, "grad_norm": 2.042421817779541, "learning_rate": 0.0001917664775000969, "loss": 1.5105, "step": 4361 }, { "epoch": 0.1562125091768582, "grad_norm": 1.3293653726577759, "learning_rate": 0.00019176186795169956, "loss": 1.5479, "step": 4362 }, { "epoch": 0.1562483213064265, "grad_norm": 1.579801321029663, "learning_rate": 0.0001917572571687647, "loss": 1.5348, "step": 4363 }, { "epoch": 0.15628413343599476, "grad_norm": 1.945090889930725, "learning_rate": 0.00019175264515135427, "loss": 1.7108, "step": 4364 }, { "epoch": 0.15631994556556306, "grad_norm": 1.8579822778701782, "learning_rate": 0.00019174803189953035, "loss": 2.0244, "step": 4365 }, { "epoch": 0.15635575769513135, "grad_norm": 1.6764092445373535, "learning_rate": 0.00019174341741335504, "loss": 1.4705, "step": 4366 }, { "epoch": 0.15639156982469962, "grad_norm": 1.4812837839126587, "learning_rate": 0.00019173880169289035, "loss": 1.6117, "step": 4367 }, { "epoch": 0.15642738195426792, "grad_norm": 1.7119643688201904, "learning_rate": 0.00019173418473819844, "loss": 1.6282, "step": 4368 }, { "epoch": 0.1564631940838362, "grad_norm": 2.8325753211975098, "learning_rate": 0.0001917295665493414, "loss": 1.4901, "step": 4369 }, { "epoch": 0.15649900621340448, "grad_norm": 1.717469334602356, "learning_rate": 0.00019172494712638136, "loss": 1.7564, "step": 4370 }, { "epoch": 0.15653481834297275, "grad_norm": 3.0501840114593506, "learning_rate": 0.0001917203264693805, "loss": 1.5696, "step": 4371 }, { "epoch": 0.15657063047254105, "grad_norm": 2.0719053745269775, "learning_rate": 0.0001917157045784009, "loss": 1.5447, "step": 4372 }, { "epoch": 0.15660644260210935, "grad_norm": 2.070021390914917, "learning_rate": 0.00019171108145350484, "loss": 1.8622, "step": 4373 }, { "epoch": 0.15664225473167762, "grad_norm": 1.451671838760376, "learning_rate": 0.00019170645709475447, "loss": 1.3056, "step": 4374 }, { "epoch": 0.1566780668612459, "grad_norm": 1.7899963855743408, "learning_rate": 0.00019170183150221201, "loss": 1.3817, "step": 4375 }, { "epoch": 0.15671387899081418, "grad_norm": 2.0643718242645264, "learning_rate": 0.00019169720467593972, "loss": 1.751, "step": 4376 }, { "epoch": 0.15674969112038248, "grad_norm": 1.62465238571167, "learning_rate": 0.0001916925766159998, "loss": 1.5946, "step": 4377 }, { "epoch": 0.15678550324995075, "grad_norm": 1.3463857173919678, "learning_rate": 0.0001916879473224545, "loss": 1.3212, "step": 4378 }, { "epoch": 0.15682131537951904, "grad_norm": 1.371046543121338, "learning_rate": 0.00019168331679536623, "loss": 1.8971, "step": 4379 }, { "epoch": 0.15685712750908734, "grad_norm": 1.3901337385177612, "learning_rate": 0.00019167868503479712, "loss": 1.6782, "step": 4380 }, { "epoch": 0.1568929396386556, "grad_norm": 1.584809422492981, "learning_rate": 0.00019167405204080956, "loss": 1.5364, "step": 4381 }, { "epoch": 0.1569287517682239, "grad_norm": 1.3930258750915527, "learning_rate": 0.00019166941781346592, "loss": 1.2805, "step": 4382 }, { "epoch": 0.15696456389779218, "grad_norm": 2.2190914154052734, "learning_rate": 0.0001916647823528285, "loss": 1.8246, "step": 4383 }, { "epoch": 0.15700037602736047, "grad_norm": 2.376316547393799, "learning_rate": 0.00019166014565895966, "loss": 1.8629, "step": 4384 }, { "epoch": 0.15703618815692874, "grad_norm": 1.8719273805618286, "learning_rate": 0.0001916555077319218, "loss": 1.642, "step": 4385 }, { "epoch": 0.15707200028649704, "grad_norm": 1.8520594835281372, "learning_rate": 0.0001916508685717773, "loss": 1.6127, "step": 4386 }, { "epoch": 0.15710781241606533, "grad_norm": 1.3201779127120972, "learning_rate": 0.0001916462281785886, "loss": 1.5689, "step": 4387 }, { "epoch": 0.1571436245456336, "grad_norm": 2.1866772174835205, "learning_rate": 0.0001916415865524181, "loss": 1.6067, "step": 4388 }, { "epoch": 0.1571794366752019, "grad_norm": 1.5497702360153198, "learning_rate": 0.00019163694369332825, "loss": 1.3363, "step": 4389 }, { "epoch": 0.15721524880477017, "grad_norm": 1.541739583015442, "learning_rate": 0.00019163229960138156, "loss": 1.3411, "step": 4390 }, { "epoch": 0.15725106093433847, "grad_norm": 1.7324460744857788, "learning_rate": 0.00019162765427664045, "loss": 1.6654, "step": 4391 }, { "epoch": 0.15728687306390673, "grad_norm": 1.6018710136413574, "learning_rate": 0.00019162300771916746, "loss": 1.6184, "step": 4392 }, { "epoch": 0.15732268519347503, "grad_norm": 1.3000634908676147, "learning_rate": 0.00019161835992902507, "loss": 1.5956, "step": 4393 }, { "epoch": 0.15735849732304333, "grad_norm": 1.8652485609054565, "learning_rate": 0.00019161371090627583, "loss": 1.5652, "step": 4394 }, { "epoch": 0.1573943094526116, "grad_norm": 1.4492467641830444, "learning_rate": 0.00019160906065098228, "loss": 1.4706, "step": 4395 }, { "epoch": 0.1574301215821799, "grad_norm": 1.5934580564498901, "learning_rate": 0.00019160440916320698, "loss": 1.6347, "step": 4396 }, { "epoch": 0.15746593371174816, "grad_norm": 1.5383509397506714, "learning_rate": 0.00019159975644301256, "loss": 1.7649, "step": 4397 }, { "epoch": 0.15750174584131646, "grad_norm": 1.6854779720306396, "learning_rate": 0.00019159510249046154, "loss": 1.6996, "step": 4398 }, { "epoch": 0.15753755797088473, "grad_norm": 1.5617847442626953, "learning_rate": 0.00019159044730561656, "loss": 1.7046, "step": 4399 }, { "epoch": 0.15757337010045303, "grad_norm": 1.5053625106811523, "learning_rate": 0.00019158579088854026, "loss": 1.6323, "step": 4400 }, { "epoch": 0.15760918223002132, "grad_norm": 1.4751994609832764, "learning_rate": 0.0001915811332392953, "loss": 1.6127, "step": 4401 }, { "epoch": 0.1576449943595896, "grad_norm": 1.9280856847763062, "learning_rate": 0.00019157647435794428, "loss": 1.2965, "step": 4402 }, { "epoch": 0.1576808064891579, "grad_norm": 2.083721160888672, "learning_rate": 0.00019157181424454996, "loss": 1.899, "step": 4403 }, { "epoch": 0.15771661861872616, "grad_norm": 1.2885043621063232, "learning_rate": 0.00019156715289917497, "loss": 1.5413, "step": 4404 }, { "epoch": 0.15775243074829445, "grad_norm": 1.6024374961853027, "learning_rate": 0.0001915624903218821, "loss": 1.4269, "step": 4405 }, { "epoch": 0.15778824287786272, "grad_norm": 2.1277174949645996, "learning_rate": 0.00019155782651273398, "loss": 1.9484, "step": 4406 }, { "epoch": 0.15782405500743102, "grad_norm": 1.5377131700515747, "learning_rate": 0.00019155316147179342, "loss": 1.5133, "step": 4407 }, { "epoch": 0.1578598671369993, "grad_norm": 1.6273584365844727, "learning_rate": 0.00019154849519912318, "loss": 1.7354, "step": 4408 }, { "epoch": 0.15789567926656758, "grad_norm": 2.1824471950531006, "learning_rate": 0.00019154382769478602, "loss": 1.6386, "step": 4409 }, { "epoch": 0.15793149139613588, "grad_norm": 1.4899530410766602, "learning_rate": 0.00019153915895884474, "loss": 1.5148, "step": 4410 }, { "epoch": 0.15796730352570415, "grad_norm": 1.584786057472229, "learning_rate": 0.00019153448899136212, "loss": 1.3899, "step": 4411 }, { "epoch": 0.15800311565527245, "grad_norm": 1.3985209465026855, "learning_rate": 0.00019152981779240106, "loss": 1.7417, "step": 4412 }, { "epoch": 0.15803892778484072, "grad_norm": 1.8065505027770996, "learning_rate": 0.00019152514536202437, "loss": 1.1059, "step": 4413 }, { "epoch": 0.158074739914409, "grad_norm": 1.7250769138336182, "learning_rate": 0.0001915204717002949, "loss": 1.819, "step": 4414 }, { "epoch": 0.15811055204397728, "grad_norm": 1.400039553642273, "learning_rate": 0.00019151579680727553, "loss": 1.586, "step": 4415 }, { "epoch": 0.15814636417354558, "grad_norm": 2.593348264694214, "learning_rate": 0.00019151112068302917, "loss": 1.9007, "step": 4416 }, { "epoch": 0.15818217630311388, "grad_norm": 1.6109260320663452, "learning_rate": 0.0001915064433276187, "loss": 1.4546, "step": 4417 }, { "epoch": 0.15821798843268214, "grad_norm": 1.6492670774459839, "learning_rate": 0.0001915017647411071, "loss": 1.5743, "step": 4418 }, { "epoch": 0.15825380056225044, "grad_norm": 2.3043372631073, "learning_rate": 0.00019149708492355728, "loss": 2.1891, "step": 4419 }, { "epoch": 0.1582896126918187, "grad_norm": 1.7343984842300415, "learning_rate": 0.0001914924038750322, "loss": 1.817, "step": 4420 }, { "epoch": 0.158325424821387, "grad_norm": 1.7777115106582642, "learning_rate": 0.00019148772159559486, "loss": 1.4785, "step": 4421 }, { "epoch": 0.15836123695095528, "grad_norm": 1.8788021802902222, "learning_rate": 0.00019148303808530818, "loss": 1.4365, "step": 4422 }, { "epoch": 0.15839704908052357, "grad_norm": 1.6759637594223022, "learning_rate": 0.00019147835334423527, "loss": 1.5425, "step": 4423 }, { "epoch": 0.15843286121009187, "grad_norm": 1.5373001098632812, "learning_rate": 0.0001914736673724391, "loss": 1.4686, "step": 4424 }, { "epoch": 0.15846867333966014, "grad_norm": 1.998205542564392, "learning_rate": 0.00019146898016998273, "loss": 1.7219, "step": 4425 }, { "epoch": 0.15850448546922843, "grad_norm": 1.5967192649841309, "learning_rate": 0.00019146429173692923, "loss": 1.4809, "step": 4426 }, { "epoch": 0.1585402975987967, "grad_norm": 1.7499881982803345, "learning_rate": 0.00019145960207334165, "loss": 1.4854, "step": 4427 }, { "epoch": 0.158576109728365, "grad_norm": 1.3718316555023193, "learning_rate": 0.00019145491117928312, "loss": 1.7586, "step": 4428 }, { "epoch": 0.15861192185793327, "grad_norm": 1.6473380327224731, "learning_rate": 0.00019145021905481673, "loss": 1.8481, "step": 4429 }, { "epoch": 0.15864773398750157, "grad_norm": 1.416988730430603, "learning_rate": 0.00019144552570000558, "loss": 1.5625, "step": 4430 }, { "epoch": 0.15868354611706986, "grad_norm": 1.5032716989517212, "learning_rate": 0.00019144083111491284, "loss": 1.511, "step": 4431 }, { "epoch": 0.15871935824663813, "grad_norm": 1.6411312818527222, "learning_rate": 0.0001914361352996017, "loss": 1.3405, "step": 4432 }, { "epoch": 0.15875517037620643, "grad_norm": 1.9214622974395752, "learning_rate": 0.00019143143825413526, "loss": 1.6389, "step": 4433 }, { "epoch": 0.1587909825057747, "grad_norm": 1.5103355646133423, "learning_rate": 0.00019142673997857678, "loss": 1.4921, "step": 4434 }, { "epoch": 0.158826794635343, "grad_norm": 1.3821022510528564, "learning_rate": 0.00019142204047298945, "loss": 1.4057, "step": 4435 }, { "epoch": 0.15886260676491126, "grad_norm": 2.0215821266174316, "learning_rate": 0.00019141733973743644, "loss": 1.7836, "step": 4436 }, { "epoch": 0.15889841889447956, "grad_norm": 1.9990155696868896, "learning_rate": 0.0001914126377719811, "loss": 1.508, "step": 4437 }, { "epoch": 0.15893423102404786, "grad_norm": 1.4154623746871948, "learning_rate": 0.00019140793457668665, "loss": 1.4155, "step": 4438 }, { "epoch": 0.15897004315361613, "grad_norm": 1.843180775642395, "learning_rate": 0.0001914032301516163, "loss": 1.6476, "step": 4439 }, { "epoch": 0.15900585528318442, "grad_norm": 1.4247946739196777, "learning_rate": 0.0001913985244968334, "loss": 1.4814, "step": 4440 }, { "epoch": 0.1590416674127527, "grad_norm": 1.6840612888336182, "learning_rate": 0.00019139381761240127, "loss": 1.5899, "step": 4441 }, { "epoch": 0.159077479542321, "grad_norm": 1.6597590446472168, "learning_rate": 0.00019138910949838321, "loss": 1.4572, "step": 4442 }, { "epoch": 0.15911329167188926, "grad_norm": 1.8821818828582764, "learning_rate": 0.0001913844001548425, "loss": 1.6773, "step": 4443 }, { "epoch": 0.15914910380145755, "grad_norm": 1.239434838294983, "learning_rate": 0.00019137968958184265, "loss": 1.5669, "step": 4444 }, { "epoch": 0.15918491593102585, "grad_norm": 1.371330738067627, "learning_rate": 0.00019137497777944691, "loss": 1.2934, "step": 4445 }, { "epoch": 0.15922072806059412, "grad_norm": 2.062408447265625, "learning_rate": 0.00019137026474771874, "loss": 1.3111, "step": 4446 }, { "epoch": 0.15925654019016242, "grad_norm": 2.3759512901306152, "learning_rate": 0.00019136555048672145, "loss": 1.4451, "step": 4447 }, { "epoch": 0.15929235231973068, "grad_norm": 2.019629716873169, "learning_rate": 0.0001913608349965186, "loss": 1.6197, "step": 4448 }, { "epoch": 0.15932816444929898, "grad_norm": 1.272171139717102, "learning_rate": 0.0001913561182771735, "loss": 1.6714, "step": 4449 }, { "epoch": 0.15936397657886725, "grad_norm": 1.7505909204483032, "learning_rate": 0.00019135140032874973, "loss": 1.5886, "step": 4450 }, { "epoch": 0.15939978870843555, "grad_norm": 1.6725510358810425, "learning_rate": 0.00019134668115131068, "loss": 1.6745, "step": 4451 }, { "epoch": 0.15943560083800384, "grad_norm": 1.7542790174484253, "learning_rate": 0.00019134196074491988, "loss": 1.584, "step": 4452 }, { "epoch": 0.1594714129675721, "grad_norm": 1.595931053161621, "learning_rate": 0.00019133723910964078, "loss": 1.5579, "step": 4453 }, { "epoch": 0.1595072250971404, "grad_norm": 2.000361204147339, "learning_rate": 0.00019133251624553696, "loss": 1.5491, "step": 4454 }, { "epoch": 0.15954303722670868, "grad_norm": 1.6233510971069336, "learning_rate": 0.00019132779215267197, "loss": 1.7638, "step": 4455 }, { "epoch": 0.15957884935627697, "grad_norm": 1.5333667993545532, "learning_rate": 0.00019132306683110933, "loss": 1.4675, "step": 4456 }, { "epoch": 0.15961466148584524, "grad_norm": 1.6372523307800293, "learning_rate": 0.0001913183402809126, "loss": 1.8686, "step": 4457 }, { "epoch": 0.15965047361541354, "grad_norm": 1.6973719596862793, "learning_rate": 0.00019131361250214541, "loss": 1.4094, "step": 4458 }, { "epoch": 0.15968628574498184, "grad_norm": 1.5486109256744385, "learning_rate": 0.00019130888349487134, "loss": 1.2735, "step": 4459 }, { "epoch": 0.1597220978745501, "grad_norm": 1.7678258419036865, "learning_rate": 0.00019130415325915406, "loss": 1.454, "step": 4460 }, { "epoch": 0.1597579100041184, "grad_norm": 1.7122492790222168, "learning_rate": 0.00019129942179505713, "loss": 1.3684, "step": 4461 }, { "epoch": 0.15979372213368667, "grad_norm": 1.4079463481903076, "learning_rate": 0.00019129468910264428, "loss": 1.6309, "step": 4462 }, { "epoch": 0.15982953426325497, "grad_norm": 2.084505319595337, "learning_rate": 0.00019128995518197912, "loss": 1.6246, "step": 4463 }, { "epoch": 0.15986534639282324, "grad_norm": 1.141055941581726, "learning_rate": 0.00019128522003312537, "loss": 1.2627, "step": 4464 }, { "epoch": 0.15990115852239153, "grad_norm": 1.7960704565048218, "learning_rate": 0.00019128048365614676, "loss": 1.5314, "step": 4465 }, { "epoch": 0.15993697065195983, "grad_norm": 1.8227843046188354, "learning_rate": 0.00019127574605110693, "loss": 1.8467, "step": 4466 }, { "epoch": 0.1599727827815281, "grad_norm": 1.508379340171814, "learning_rate": 0.00019127100721806975, "loss": 1.6528, "step": 4467 }, { "epoch": 0.1600085949110964, "grad_norm": 1.5757975578308105, "learning_rate": 0.00019126626715709885, "loss": 1.25, "step": 4468 }, { "epoch": 0.16004440704066467, "grad_norm": 1.5202853679656982, "learning_rate": 0.00019126152586825806, "loss": 1.4711, "step": 4469 }, { "epoch": 0.16008021917023296, "grad_norm": 1.781907558441162, "learning_rate": 0.00019125678335161117, "loss": 1.4699, "step": 4470 }, { "epoch": 0.16011603129980123, "grad_norm": 1.2357120513916016, "learning_rate": 0.00019125203960722198, "loss": 1.5946, "step": 4471 }, { "epoch": 0.16015184342936953, "grad_norm": 1.6788946390151978, "learning_rate": 0.00019124729463515427, "loss": 1.719, "step": 4472 }, { "epoch": 0.16018765555893782, "grad_norm": 1.5136687755584717, "learning_rate": 0.00019124254843547195, "loss": 1.5618, "step": 4473 }, { "epoch": 0.1602234676885061, "grad_norm": 1.810028314590454, "learning_rate": 0.0001912378010082388, "loss": 1.5755, "step": 4474 }, { "epoch": 0.1602592798180744, "grad_norm": 4.77049446105957, "learning_rate": 0.00019123305235351873, "loss": 1.4884, "step": 4475 }, { "epoch": 0.16029509194764266, "grad_norm": 1.4742318391799927, "learning_rate": 0.00019122830247137563, "loss": 1.3881, "step": 4476 }, { "epoch": 0.16033090407721096, "grad_norm": 2.340515375137329, "learning_rate": 0.00019122355136187342, "loss": 1.8475, "step": 4477 }, { "epoch": 0.16036671620677923, "grad_norm": 1.6513471603393555, "learning_rate": 0.00019121879902507595, "loss": 1.6164, "step": 4478 }, { "epoch": 0.16040252833634752, "grad_norm": 1.3290749788284302, "learning_rate": 0.00019121404546104724, "loss": 1.6316, "step": 4479 }, { "epoch": 0.16043834046591582, "grad_norm": 1.52192223072052, "learning_rate": 0.00019120929066985122, "loss": 1.2523, "step": 4480 }, { "epoch": 0.1604741525954841, "grad_norm": 2.044299602508545, "learning_rate": 0.0001912045346515518, "loss": 1.4638, "step": 4481 }, { "epoch": 0.16050996472505238, "grad_norm": 1.4951385259628296, "learning_rate": 0.00019119977740621305, "loss": 1.476, "step": 4482 }, { "epoch": 0.16054577685462065, "grad_norm": 1.5891962051391602, "learning_rate": 0.0001911950189338989, "loss": 1.1198, "step": 4483 }, { "epoch": 0.16058158898418895, "grad_norm": 1.8488585948944092, "learning_rate": 0.00019119025923467343, "loss": 1.5125, "step": 4484 }, { "epoch": 0.16061740111375722, "grad_norm": 2.9444382190704346, "learning_rate": 0.00019118549830860065, "loss": 1.4347, "step": 4485 }, { "epoch": 0.16065321324332552, "grad_norm": 2.6969316005706787, "learning_rate": 0.0001911807361557446, "loss": 1.4744, "step": 4486 }, { "epoch": 0.1606890253728938, "grad_norm": 1.69938063621521, "learning_rate": 0.00019117597277616932, "loss": 1.5696, "step": 4487 }, { "epoch": 0.16072483750246208, "grad_norm": 1.9011529684066772, "learning_rate": 0.00019117120816993899, "loss": 1.6207, "step": 4488 }, { "epoch": 0.16076064963203038, "grad_norm": 1.8051317930221558, "learning_rate": 0.00019116644233711764, "loss": 1.5994, "step": 4489 }, { "epoch": 0.16079646176159865, "grad_norm": 1.7188615798950195, "learning_rate": 0.0001911616752777694, "loss": 1.7194, "step": 4490 }, { "epoch": 0.16083227389116694, "grad_norm": 1.4117614030838013, "learning_rate": 0.0001911569069919584, "loss": 1.666, "step": 4491 }, { "epoch": 0.1608680860207352, "grad_norm": 1.8532236814498901, "learning_rate": 0.00019115213747974882, "loss": 1.2589, "step": 4492 }, { "epoch": 0.1609038981503035, "grad_norm": 1.4849985837936401, "learning_rate": 0.0001911473667412048, "loss": 1.5491, "step": 4493 }, { "epoch": 0.1609397102798718, "grad_norm": 1.5304160118103027, "learning_rate": 0.00019114259477639057, "loss": 1.656, "step": 4494 }, { "epoch": 0.16097552240944007, "grad_norm": 1.8665733337402344, "learning_rate": 0.00019113782158537024, "loss": 1.447, "step": 4495 }, { "epoch": 0.16101133453900837, "grad_norm": 1.4369174242019653, "learning_rate": 0.0001911330471682081, "loss": 1.3837, "step": 4496 }, { "epoch": 0.16104714666857664, "grad_norm": 1.2710061073303223, "learning_rate": 0.00019112827152496835, "loss": 1.7857, "step": 4497 }, { "epoch": 0.16108295879814494, "grad_norm": 1.0543251037597656, "learning_rate": 0.00019112349465571525, "loss": 1.6002, "step": 4498 }, { "epoch": 0.1611187709277132, "grad_norm": 1.3556406497955322, "learning_rate": 0.0001911187165605131, "loss": 1.5894, "step": 4499 }, { "epoch": 0.1611545830572815, "grad_norm": 1.9065412282943726, "learning_rate": 0.00019111393723942615, "loss": 1.5288, "step": 4500 }, { "epoch": 0.1611903951868498, "grad_norm": 1.592268705368042, "learning_rate": 0.00019110915669251868, "loss": 1.5041, "step": 4501 }, { "epoch": 0.16122620731641807, "grad_norm": 1.5915298461914062, "learning_rate": 0.00019110437491985505, "loss": 1.9453, "step": 4502 }, { "epoch": 0.16126201944598637, "grad_norm": 2.055844306945801, "learning_rate": 0.00019109959192149955, "loss": 1.405, "step": 4503 }, { "epoch": 0.16129783157555463, "grad_norm": 1.386273980140686, "learning_rate": 0.0001910948076975166, "loss": 1.3482, "step": 4504 }, { "epoch": 0.16133364370512293, "grad_norm": 1.3630104064941406, "learning_rate": 0.00019109002224797046, "loss": 1.8159, "step": 4505 }, { "epoch": 0.1613694558346912, "grad_norm": 1.701704978942871, "learning_rate": 0.00019108523557292558, "loss": 1.5629, "step": 4506 }, { "epoch": 0.1614052679642595, "grad_norm": 1.483626127243042, "learning_rate": 0.00019108044767244636, "loss": 1.6765, "step": 4507 }, { "epoch": 0.16144108009382777, "grad_norm": 1.3464930057525635, "learning_rate": 0.0001910756585465972, "loss": 1.7481, "step": 4508 }, { "epoch": 0.16147689222339606, "grad_norm": 1.5823675394058228, "learning_rate": 0.0001910708681954425, "loss": 1.5115, "step": 4509 }, { "epoch": 0.16151270435296436, "grad_norm": 1.8616117238998413, "learning_rate": 0.00019106607661904682, "loss": 1.6684, "step": 4510 }, { "epoch": 0.16154851648253263, "grad_norm": 1.4790433645248413, "learning_rate": 0.00019106128381747448, "loss": 1.528, "step": 4511 }, { "epoch": 0.16158432861210092, "grad_norm": 1.3721879720687866, "learning_rate": 0.00019105648979079006, "loss": 1.6033, "step": 4512 }, { "epoch": 0.1616201407416692, "grad_norm": 1.1864506006240845, "learning_rate": 0.000191051694539058, "loss": 1.5174, "step": 4513 }, { "epoch": 0.1616559528712375, "grad_norm": 1.2634984254837036, "learning_rate": 0.0001910468980623428, "loss": 1.5987, "step": 4514 }, { "epoch": 0.16169176500080576, "grad_norm": 2.0381622314453125, "learning_rate": 0.0001910421003607091, "loss": 1.6299, "step": 4515 }, { "epoch": 0.16172757713037406, "grad_norm": 1.4539151191711426, "learning_rate": 0.00019103730143422135, "loss": 1.8375, "step": 4516 }, { "epoch": 0.16176338925994235, "grad_norm": 1.7480626106262207, "learning_rate": 0.00019103250128294413, "loss": 1.608, "step": 4517 }, { "epoch": 0.16179920138951062, "grad_norm": 1.822675347328186, "learning_rate": 0.00019102769990694208, "loss": 1.5056, "step": 4518 }, { "epoch": 0.16183501351907892, "grad_norm": 1.5207996368408203, "learning_rate": 0.00019102289730627968, "loss": 1.5263, "step": 4519 }, { "epoch": 0.1618708256486472, "grad_norm": 2.3780176639556885, "learning_rate": 0.0001910180934810216, "loss": 1.581, "step": 4520 }, { "epoch": 0.16190663777821548, "grad_norm": 2.004570484161377, "learning_rate": 0.0001910132884312325, "loss": 1.853, "step": 4521 }, { "epoch": 0.16194244990778375, "grad_norm": 1.402849793434143, "learning_rate": 0.00019100848215697705, "loss": 1.5757, "step": 4522 }, { "epoch": 0.16197826203735205, "grad_norm": 1.324273943901062, "learning_rate": 0.00019100367465831983, "loss": 1.5214, "step": 4523 }, { "epoch": 0.16201407416692035, "grad_norm": 1.5259591341018677, "learning_rate": 0.00019099886593532554, "loss": 1.4345, "step": 4524 }, { "epoch": 0.16204988629648862, "grad_norm": 1.5737764835357666, "learning_rate": 0.00019099405598805888, "loss": 1.5467, "step": 4525 }, { "epoch": 0.1620856984260569, "grad_norm": 1.5883179903030396, "learning_rate": 0.0001909892448165846, "loss": 1.553, "step": 4526 }, { "epoch": 0.16212151055562518, "grad_norm": 1.3239035606384277, "learning_rate": 0.0001909844324209674, "loss": 1.5387, "step": 4527 }, { "epoch": 0.16215732268519348, "grad_norm": 1.957148790359497, "learning_rate": 0.00019097961880127203, "loss": 1.5983, "step": 4528 }, { "epoch": 0.16219313481476175, "grad_norm": 1.2890499830245972, "learning_rate": 0.0001909748039575632, "loss": 1.6041, "step": 4529 }, { "epoch": 0.16222894694433004, "grad_norm": 1.505007266998291, "learning_rate": 0.00019096998788990574, "loss": 1.3721, "step": 4530 }, { "epoch": 0.16226475907389834, "grad_norm": 1.8835853338241577, "learning_rate": 0.00019096517059836448, "loss": 1.8609, "step": 4531 }, { "epoch": 0.1623005712034666, "grad_norm": 1.509743571281433, "learning_rate": 0.00019096035208300416, "loss": 1.5606, "step": 4532 }, { "epoch": 0.1623363833330349, "grad_norm": 1.2899866104125977, "learning_rate": 0.00019095553234388962, "loss": 1.7222, "step": 4533 }, { "epoch": 0.16237219546260317, "grad_norm": 1.679097294807434, "learning_rate": 0.00019095071138108575, "loss": 1.5859, "step": 4534 }, { "epoch": 0.16240800759217147, "grad_norm": 1.3049614429473877, "learning_rate": 0.00019094588919465734, "loss": 1.4242, "step": 4535 }, { "epoch": 0.16244381972173974, "grad_norm": 2.140238046646118, "learning_rate": 0.0001909410657846693, "loss": 1.7717, "step": 4536 }, { "epoch": 0.16247963185130804, "grad_norm": 1.8646196126937866, "learning_rate": 0.00019093624115118656, "loss": 1.5395, "step": 4537 }, { "epoch": 0.16251544398087633, "grad_norm": 1.333450198173523, "learning_rate": 0.00019093141529427396, "loss": 1.5125, "step": 4538 }, { "epoch": 0.1625512561104446, "grad_norm": 1.132122278213501, "learning_rate": 0.00019092658821399648, "loss": 1.4367, "step": 4539 }, { "epoch": 0.1625870682400129, "grad_norm": 1.6168757677078247, "learning_rate": 0.00019092175991041905, "loss": 1.6936, "step": 4540 }, { "epoch": 0.16262288036958117, "grad_norm": 1.564820647239685, "learning_rate": 0.0001909169303836066, "loss": 1.645, "step": 4541 }, { "epoch": 0.16265869249914947, "grad_norm": 1.6114917993545532, "learning_rate": 0.00019091209963362416, "loss": 1.4573, "step": 4542 }, { "epoch": 0.16269450462871773, "grad_norm": 2.2553696632385254, "learning_rate": 0.00019090726766053667, "loss": 1.7323, "step": 4543 }, { "epoch": 0.16273031675828603, "grad_norm": 1.2172681093215942, "learning_rate": 0.00019090243446440915, "loss": 1.5023, "step": 4544 }, { "epoch": 0.16276612888785433, "grad_norm": 1.509480357170105, "learning_rate": 0.0001908976000453066, "loss": 1.6955, "step": 4545 }, { "epoch": 0.1628019410174226, "grad_norm": 1.7382971048355103, "learning_rate": 0.00019089276440329415, "loss": 1.4946, "step": 4546 }, { "epoch": 0.1628377531469909, "grad_norm": 2.00982928276062, "learning_rate": 0.00019088792753843675, "loss": 1.7347, "step": 4547 }, { "epoch": 0.16287356527655916, "grad_norm": 1.5432096719741821, "learning_rate": 0.00019088308945079956, "loss": 1.8206, "step": 4548 }, { "epoch": 0.16290937740612746, "grad_norm": 1.4766311645507812, "learning_rate": 0.00019087825014044762, "loss": 1.6214, "step": 4549 }, { "epoch": 0.16294518953569573, "grad_norm": 2.6827495098114014, "learning_rate": 0.00019087340960744604, "loss": 1.6215, "step": 4550 }, { "epoch": 0.16298100166526402, "grad_norm": 1.2720814943313599, "learning_rate": 0.00019086856785185992, "loss": 1.5734, "step": 4551 }, { "epoch": 0.16301681379483232, "grad_norm": 1.7849035263061523, "learning_rate": 0.0001908637248737545, "loss": 1.5433, "step": 4552 }, { "epoch": 0.1630526259244006, "grad_norm": 1.7423144578933716, "learning_rate": 0.00019085888067319485, "loss": 1.4252, "step": 4553 }, { "epoch": 0.1630884380539689, "grad_norm": 2.395569324493408, "learning_rate": 0.00019085403525024612, "loss": 1.5359, "step": 4554 }, { "epoch": 0.16312425018353716, "grad_norm": 1.3138221502304077, "learning_rate": 0.00019084918860497356, "loss": 1.4022, "step": 4555 }, { "epoch": 0.16316006231310545, "grad_norm": 1.5145282745361328, "learning_rate": 0.00019084434073744238, "loss": 1.108, "step": 4556 }, { "epoch": 0.16319587444267372, "grad_norm": 1.376220703125, "learning_rate": 0.00019083949164771773, "loss": 1.4966, "step": 4557 }, { "epoch": 0.16323168657224202, "grad_norm": 1.3827080726623535, "learning_rate": 0.00019083464133586492, "loss": 1.6534, "step": 4558 }, { "epoch": 0.16326749870181032, "grad_norm": 1.524684190750122, "learning_rate": 0.00019082978980194918, "loss": 1.7182, "step": 4559 }, { "epoch": 0.16330331083137858, "grad_norm": 1.8181837797164917, "learning_rate": 0.00019082493704603576, "loss": 1.631, "step": 4560 }, { "epoch": 0.16333912296094688, "grad_norm": 1.2133936882019043, "learning_rate": 0.00019082008306819001, "loss": 1.7072, "step": 4561 }, { "epoch": 0.16337493509051515, "grad_norm": 1.3571362495422363, "learning_rate": 0.00019081522786847717, "loss": 1.6741, "step": 4562 }, { "epoch": 0.16341074722008345, "grad_norm": 1.4400066137313843, "learning_rate": 0.0001908103714469626, "loss": 1.394, "step": 4563 }, { "epoch": 0.16344655934965172, "grad_norm": 2.2084951400756836, "learning_rate": 0.00019080551380371157, "loss": 1.7241, "step": 4564 }, { "epoch": 0.16348237147922, "grad_norm": 1.2983593940734863, "learning_rate": 0.0001908006549387895, "loss": 1.4786, "step": 4565 }, { "epoch": 0.1635181836087883, "grad_norm": 1.8585529327392578, "learning_rate": 0.00019079579485226176, "loss": 1.5445, "step": 4566 }, { "epoch": 0.16355399573835658, "grad_norm": 1.7613205909729004, "learning_rate": 0.0001907909335441937, "loss": 1.54, "step": 4567 }, { "epoch": 0.16358980786792487, "grad_norm": 1.85093355178833, "learning_rate": 0.00019078607101465078, "loss": 1.5251, "step": 4568 }, { "epoch": 0.16362561999749314, "grad_norm": 2.1224894523620605, "learning_rate": 0.00019078120726369834, "loss": 1.6325, "step": 4569 }, { "epoch": 0.16366143212706144, "grad_norm": 1.862607717514038, "learning_rate": 0.00019077634229140188, "loss": 1.4159, "step": 4570 }, { "epoch": 0.1636972442566297, "grad_norm": 1.5398863554000854, "learning_rate": 0.0001907714760978268, "loss": 1.6412, "step": 4571 }, { "epoch": 0.163733056386198, "grad_norm": 1.6998611688613892, "learning_rate": 0.0001907666086830386, "loss": 1.6833, "step": 4572 }, { "epoch": 0.1637688685157663, "grad_norm": 1.262229561805725, "learning_rate": 0.0001907617400471028, "loss": 1.3874, "step": 4573 }, { "epoch": 0.16380468064533457, "grad_norm": 1.8390412330627441, "learning_rate": 0.00019075687019008483, "loss": 1.7563, "step": 4574 }, { "epoch": 0.16384049277490287, "grad_norm": 1.7868350744247437, "learning_rate": 0.00019075199911205024, "loss": 1.5629, "step": 4575 }, { "epoch": 0.16387630490447114, "grad_norm": 1.949973464012146, "learning_rate": 0.00019074712681306456, "loss": 1.9253, "step": 4576 }, { "epoch": 0.16391211703403943, "grad_norm": 1.4536129236221313, "learning_rate": 0.00019074225329319337, "loss": 1.5995, "step": 4577 }, { "epoch": 0.1639479291636077, "grad_norm": 1.5427803993225098, "learning_rate": 0.00019073737855250218, "loss": 1.5241, "step": 4578 }, { "epoch": 0.163983741293176, "grad_norm": 1.3218435049057007, "learning_rate": 0.00019073250259105663, "loss": 1.6316, "step": 4579 }, { "epoch": 0.1640195534227443, "grad_norm": 1.3886061906814575, "learning_rate": 0.00019072762540892226, "loss": 1.5922, "step": 4580 }, { "epoch": 0.16405536555231257, "grad_norm": 1.7809525728225708, "learning_rate": 0.00019072274700616474, "loss": 1.951, "step": 4581 }, { "epoch": 0.16409117768188086, "grad_norm": 1.7092533111572266, "learning_rate": 0.00019071786738284968, "loss": 1.4504, "step": 4582 }, { "epoch": 0.16412698981144913, "grad_norm": 1.6752238273620605, "learning_rate": 0.00019071298653904276, "loss": 1.6263, "step": 4583 }, { "epoch": 0.16416280194101743, "grad_norm": 1.514328122138977, "learning_rate": 0.00019070810447480957, "loss": 1.6375, "step": 4584 }, { "epoch": 0.1641986140705857, "grad_norm": 1.2303587198257446, "learning_rate": 0.00019070322119021588, "loss": 1.4602, "step": 4585 }, { "epoch": 0.164234426200154, "grad_norm": 2.415630578994751, "learning_rate": 0.00019069833668532732, "loss": 1.6044, "step": 4586 }, { "epoch": 0.1642702383297223, "grad_norm": 1.291359543800354, "learning_rate": 0.00019069345096020966, "loss": 1.5712, "step": 4587 }, { "epoch": 0.16430605045929056, "grad_norm": 1.2763800621032715, "learning_rate": 0.00019068856401492857, "loss": 1.6667, "step": 4588 }, { "epoch": 0.16434186258885886, "grad_norm": 1.4778591394424438, "learning_rate": 0.00019068367584954986, "loss": 1.3605, "step": 4589 }, { "epoch": 0.16437767471842712, "grad_norm": 1.095234990119934, "learning_rate": 0.00019067878646413923, "loss": 1.5766, "step": 4590 }, { "epoch": 0.16441348684799542, "grad_norm": 1.2776097059249878, "learning_rate": 0.0001906738958587625, "loss": 1.5516, "step": 4591 }, { "epoch": 0.1644492989775637, "grad_norm": 1.524300456047058, "learning_rate": 0.00019066900403348551, "loss": 1.4556, "step": 4592 }, { "epoch": 0.164485111107132, "grad_norm": 1.618364691734314, "learning_rate": 0.000190664110988374, "loss": 1.6515, "step": 4593 }, { "epoch": 0.16452092323670028, "grad_norm": 2.83412766456604, "learning_rate": 0.00019065921672349384, "loss": 1.6118, "step": 4594 }, { "epoch": 0.16455673536626855, "grad_norm": 1.6449576616287231, "learning_rate": 0.00019065432123891083, "loss": 1.5997, "step": 4595 }, { "epoch": 0.16459254749583685, "grad_norm": 1.6248342990875244, "learning_rate": 0.00019064942453469086, "loss": 1.6638, "step": 4596 }, { "epoch": 0.16462835962540512, "grad_norm": 2.3031158447265625, "learning_rate": 0.0001906445266108998, "loss": 1.7916, "step": 4597 }, { "epoch": 0.16466417175497342, "grad_norm": 1.7779685258865356, "learning_rate": 0.0001906396274676036, "loss": 1.4638, "step": 4598 }, { "epoch": 0.16469998388454168, "grad_norm": 1.4672049283981323, "learning_rate": 0.00019063472710486814, "loss": 1.6929, "step": 4599 }, { "epoch": 0.16473579601410998, "grad_norm": 1.7978802919387817, "learning_rate": 0.0001906298255227593, "loss": 1.6518, "step": 4600 }, { "epoch": 0.16477160814367828, "grad_norm": 1.8508810997009277, "learning_rate": 0.00019062492272134307, "loss": 1.4408, "step": 4601 }, { "epoch": 0.16480742027324655, "grad_norm": 1.8320178985595703, "learning_rate": 0.0001906200187006854, "loss": 1.5152, "step": 4602 }, { "epoch": 0.16484323240281484, "grad_norm": 1.634345531463623, "learning_rate": 0.0001906151134608523, "loss": 1.6666, "step": 4603 }, { "epoch": 0.1648790445323831, "grad_norm": 1.9694124460220337, "learning_rate": 0.0001906102070019097, "loss": 1.6432, "step": 4604 }, { "epoch": 0.1649148566619514, "grad_norm": 2.6619157791137695, "learning_rate": 0.00019060529932392366, "loss": 1.4241, "step": 4605 }, { "epoch": 0.16495066879151968, "grad_norm": 2.111093282699585, "learning_rate": 0.00019060039042696016, "loss": 1.5643, "step": 4606 }, { "epoch": 0.16498648092108797, "grad_norm": 1.5327450037002563, "learning_rate": 0.00019059548031108528, "loss": 1.507, "step": 4607 }, { "epoch": 0.16502229305065624, "grad_norm": 2.1236813068389893, "learning_rate": 0.0001905905689763651, "loss": 1.3831, "step": 4608 }, { "epoch": 0.16505810518022454, "grad_norm": 1.4303845167160034, "learning_rate": 0.00019058565642286567, "loss": 1.1834, "step": 4609 }, { "epoch": 0.16509391730979284, "grad_norm": 1.145059585571289, "learning_rate": 0.00019058074265065303, "loss": 1.4526, "step": 4610 }, { "epoch": 0.1651297294393611, "grad_norm": 2.205949068069458, "learning_rate": 0.00019057582765979341, "loss": 1.4746, "step": 4611 }, { "epoch": 0.1651655415689294, "grad_norm": 1.787413477897644, "learning_rate": 0.00019057091145035281, "loss": 1.5251, "step": 4612 }, { "epoch": 0.16520135369849767, "grad_norm": 1.5059257745742798, "learning_rate": 0.00019056599402239742, "loss": 1.5406, "step": 4613 }, { "epoch": 0.16523716582806597, "grad_norm": 1.2862091064453125, "learning_rate": 0.00019056107537599343, "loss": 1.5867, "step": 4614 }, { "epoch": 0.16527297795763424, "grad_norm": 1.640185832977295, "learning_rate": 0.000190556155511207, "loss": 1.9078, "step": 4615 }, { "epoch": 0.16530879008720253, "grad_norm": 2.0387260913848877, "learning_rate": 0.00019055123442810427, "loss": 1.6649, "step": 4616 }, { "epoch": 0.16534460221677083, "grad_norm": 1.985213041305542, "learning_rate": 0.00019054631212675152, "loss": 1.2348, "step": 4617 }, { "epoch": 0.1653804143463391, "grad_norm": 1.5583789348602295, "learning_rate": 0.00019054138860721492, "loss": 1.7628, "step": 4618 }, { "epoch": 0.1654162264759074, "grad_norm": 2.322619915008545, "learning_rate": 0.00019053646386956073, "loss": 1.385, "step": 4619 }, { "epoch": 0.16545203860547567, "grad_norm": 1.4217337369918823, "learning_rate": 0.00019053153791385516, "loss": 1.5673, "step": 4620 }, { "epoch": 0.16548785073504396, "grad_norm": 2.825695276260376, "learning_rate": 0.00019052661074016458, "loss": 1.5057, "step": 4621 }, { "epoch": 0.16552366286461223, "grad_norm": 1.6821850538253784, "learning_rate": 0.0001905216823485552, "loss": 1.4592, "step": 4622 }, { "epoch": 0.16555947499418053, "grad_norm": 1.3392614126205444, "learning_rate": 0.00019051675273909336, "loss": 1.7243, "step": 4623 }, { "epoch": 0.16559528712374882, "grad_norm": 1.9273262023925781, "learning_rate": 0.00019051182191184537, "loss": 1.3487, "step": 4624 }, { "epoch": 0.1656310992533171, "grad_norm": 1.8148261308670044, "learning_rate": 0.00019050688986687754, "loss": 1.5975, "step": 4625 }, { "epoch": 0.1656669113828854, "grad_norm": 1.3820042610168457, "learning_rate": 0.00019050195660425627, "loss": 1.465, "step": 4626 }, { "epoch": 0.16570272351245366, "grad_norm": 1.627036690711975, "learning_rate": 0.00019049702212404793, "loss": 1.3036, "step": 4627 }, { "epoch": 0.16573853564202196, "grad_norm": 1.241610050201416, "learning_rate": 0.00019049208642631885, "loss": 1.6127, "step": 4628 }, { "epoch": 0.16577434777159022, "grad_norm": 1.7306376695632935, "learning_rate": 0.00019048714951113552, "loss": 1.775, "step": 4629 }, { "epoch": 0.16581015990115852, "grad_norm": 1.5658414363861084, "learning_rate": 0.00019048221137856427, "loss": 1.8022, "step": 4630 }, { "epoch": 0.16584597203072682, "grad_norm": 1.4824405908584595, "learning_rate": 0.0001904772720286716, "loss": 1.5788, "step": 4631 }, { "epoch": 0.1658817841602951, "grad_norm": 1.6523913145065308, "learning_rate": 0.00019047233146152393, "loss": 1.4036, "step": 4632 }, { "epoch": 0.16591759628986338, "grad_norm": 1.4244657754898071, "learning_rate": 0.00019046738967718778, "loss": 1.6885, "step": 4633 }, { "epoch": 0.16595340841943165, "grad_norm": 1.679327368736267, "learning_rate": 0.00019046244667572957, "loss": 1.6083, "step": 4634 }, { "epoch": 0.16598922054899995, "grad_norm": 1.6196266412734985, "learning_rate": 0.00019045750245721583, "loss": 1.3341, "step": 4635 }, { "epoch": 0.16602503267856822, "grad_norm": 2.6375415325164795, "learning_rate": 0.00019045255702171307, "loss": 1.7689, "step": 4636 }, { "epoch": 0.16606084480813652, "grad_norm": 1.9008333683013916, "learning_rate": 0.00019044761036928783, "loss": 1.6576, "step": 4637 }, { "epoch": 0.1660966569377048, "grad_norm": 2.3040616512298584, "learning_rate": 0.00019044266250000668, "loss": 1.7239, "step": 4638 }, { "epoch": 0.16613246906727308, "grad_norm": 2.4334566593170166, "learning_rate": 0.00019043771341393614, "loss": 1.7195, "step": 4639 }, { "epoch": 0.16616828119684138, "grad_norm": 1.575951337814331, "learning_rate": 0.00019043276311114283, "loss": 1.6378, "step": 4640 }, { "epoch": 0.16620409332640965, "grad_norm": 2.337109327316284, "learning_rate": 0.00019042781159169336, "loss": 1.6753, "step": 4641 }, { "epoch": 0.16623990545597794, "grad_norm": 1.561690092086792, "learning_rate": 0.00019042285885565433, "loss": 1.6058, "step": 4642 }, { "epoch": 0.1662757175855462, "grad_norm": 1.7889922857284546, "learning_rate": 0.00019041790490309233, "loss": 1.1429, "step": 4643 }, { "epoch": 0.1663115297151145, "grad_norm": 1.3240993022918701, "learning_rate": 0.00019041294973407412, "loss": 1.641, "step": 4644 }, { "epoch": 0.1663473418446828, "grad_norm": 2.112696409225464, "learning_rate": 0.00019040799334866626, "loss": 1.8524, "step": 4645 }, { "epoch": 0.16638315397425107, "grad_norm": 1.8621598482131958, "learning_rate": 0.00019040303574693545, "loss": 1.724, "step": 4646 }, { "epoch": 0.16641896610381937, "grad_norm": 1.5603737831115723, "learning_rate": 0.00019039807692894842, "loss": 1.4181, "step": 4647 }, { "epoch": 0.16645477823338764, "grad_norm": 1.5125117301940918, "learning_rate": 0.00019039311689477185, "loss": 1.628, "step": 4648 }, { "epoch": 0.16649059036295594, "grad_norm": 1.4379773139953613, "learning_rate": 0.0001903881556444725, "loss": 1.5146, "step": 4649 }, { "epoch": 0.1665264024925242, "grad_norm": 2.8193790912628174, "learning_rate": 0.00019038319317811714, "loss": 1.4629, "step": 4650 }, { "epoch": 0.1665622146220925, "grad_norm": 1.4412678480148315, "learning_rate": 0.00019037822949577248, "loss": 1.2369, "step": 4651 }, { "epoch": 0.1665980267516608, "grad_norm": 1.3727669715881348, "learning_rate": 0.00019037326459750534, "loss": 1.6593, "step": 4652 }, { "epoch": 0.16663383888122907, "grad_norm": 1.2576712369918823, "learning_rate": 0.00019036829848338246, "loss": 1.4383, "step": 4653 }, { "epoch": 0.16666965101079736, "grad_norm": 1.55268132686615, "learning_rate": 0.00019036333115347073, "loss": 1.6322, "step": 4654 }, { "epoch": 0.16670546314036563, "grad_norm": 1.6275209188461304, "learning_rate": 0.00019035836260783691, "loss": 1.4125, "step": 4655 }, { "epoch": 0.16674127526993393, "grad_norm": 1.3718266487121582, "learning_rate": 0.00019035339284654787, "loss": 1.3941, "step": 4656 }, { "epoch": 0.1667770873995022, "grad_norm": 1.859714388847351, "learning_rate": 0.0001903484218696705, "loss": 1.5406, "step": 4657 }, { "epoch": 0.1668128995290705, "grad_norm": 1.5116417407989502, "learning_rate": 0.00019034344967727165, "loss": 1.6361, "step": 4658 }, { "epoch": 0.1668487116586388, "grad_norm": 1.9010447263717651, "learning_rate": 0.0001903384762694182, "loss": 1.7493, "step": 4659 }, { "epoch": 0.16688452378820706, "grad_norm": 1.5057936906814575, "learning_rate": 0.0001903335016461771, "loss": 1.3977, "step": 4660 }, { "epoch": 0.16692033591777536, "grad_norm": 1.8938969373703003, "learning_rate": 0.00019032852580761527, "loss": 1.6976, "step": 4661 }, { "epoch": 0.16695614804734363, "grad_norm": 1.1892553567886353, "learning_rate": 0.00019032354875379962, "loss": 1.2591, "step": 4662 }, { "epoch": 0.16699196017691192, "grad_norm": 1.9757999181747437, "learning_rate": 0.00019031857048479713, "loss": 1.305, "step": 4663 }, { "epoch": 0.1670277723064802, "grad_norm": 1.8454691171646118, "learning_rate": 0.00019031359100067478, "loss": 1.7795, "step": 4664 }, { "epoch": 0.1670635844360485, "grad_norm": 2.3742315769195557, "learning_rate": 0.00019030861030149956, "loss": 1.4219, "step": 4665 }, { "epoch": 0.1670993965656168, "grad_norm": 1.6165446043014526, "learning_rate": 0.00019030362838733846, "loss": 1.3658, "step": 4666 }, { "epoch": 0.16713520869518506, "grad_norm": 1.2845289707183838, "learning_rate": 0.00019029864525825857, "loss": 1.5783, "step": 4667 }, { "epoch": 0.16717102082475335, "grad_norm": 1.3073277473449707, "learning_rate": 0.00019029366091432684, "loss": 1.6852, "step": 4668 }, { "epoch": 0.16720683295432162, "grad_norm": 1.8444633483886719, "learning_rate": 0.0001902886753556104, "loss": 1.3777, "step": 4669 }, { "epoch": 0.16724264508388992, "grad_norm": 1.9535983800888062, "learning_rate": 0.0001902836885821763, "loss": 1.5784, "step": 4670 }, { "epoch": 0.1672784572134582, "grad_norm": 1.5261894464492798, "learning_rate": 0.00019027870059409158, "loss": 1.3801, "step": 4671 }, { "epoch": 0.16731426934302648, "grad_norm": 1.40703284740448, "learning_rate": 0.00019027371139142342, "loss": 1.4457, "step": 4672 }, { "epoch": 0.16735008147259478, "grad_norm": 1.388258695602417, "learning_rate": 0.00019026872097423894, "loss": 1.3877, "step": 4673 }, { "epoch": 0.16738589360216305, "grad_norm": 1.8786894083023071, "learning_rate": 0.00019026372934260525, "loss": 1.318, "step": 4674 }, { "epoch": 0.16742170573173135, "grad_norm": 1.9680266380310059, "learning_rate": 0.0001902587364965895, "loss": 1.36, "step": 4675 }, { "epoch": 0.16745751786129962, "grad_norm": 1.4114983081817627, "learning_rate": 0.00019025374243625888, "loss": 1.7629, "step": 4676 }, { "epoch": 0.1674933299908679, "grad_norm": 1.4164817333221436, "learning_rate": 0.0001902487471616806, "loss": 1.935, "step": 4677 }, { "epoch": 0.16752914212043618, "grad_norm": 1.973514437675476, "learning_rate": 0.00019024375067292181, "loss": 1.3236, "step": 4678 }, { "epoch": 0.16756495425000448, "grad_norm": 1.940759301185608, "learning_rate": 0.00019023875297004977, "loss": 1.7045, "step": 4679 }, { "epoch": 0.16760076637957277, "grad_norm": 1.5399664640426636, "learning_rate": 0.0001902337540531317, "loss": 1.8139, "step": 4680 }, { "epoch": 0.16763657850914104, "grad_norm": 1.957043170928955, "learning_rate": 0.00019022875392223486, "loss": 1.6672, "step": 4681 }, { "epoch": 0.16767239063870934, "grad_norm": 1.677278757095337, "learning_rate": 0.00019022375257742656, "loss": 1.8589, "step": 4682 }, { "epoch": 0.1677082027682776, "grad_norm": 1.3992509841918945, "learning_rate": 0.000190218750018774, "loss": 1.3775, "step": 4683 }, { "epoch": 0.1677440148978459, "grad_norm": 3.1266071796417236, "learning_rate": 0.00019021374624634456, "loss": 1.5442, "step": 4684 }, { "epoch": 0.16777982702741417, "grad_norm": 1.9289811849594116, "learning_rate": 0.0001902087412602055, "loss": 1.4118, "step": 4685 }, { "epoch": 0.16781563915698247, "grad_norm": 1.3202590942382812, "learning_rate": 0.00019020373506042424, "loss": 1.3978, "step": 4686 }, { "epoch": 0.16785145128655077, "grad_norm": 1.4680845737457275, "learning_rate": 0.00019019872764706804, "loss": 1.8426, "step": 4687 }, { "epoch": 0.16788726341611904, "grad_norm": 1.5509357452392578, "learning_rate": 0.00019019371902020434, "loss": 1.6945, "step": 4688 }, { "epoch": 0.16792307554568733, "grad_norm": 2.0995917320251465, "learning_rate": 0.00019018870917990045, "loss": 1.9977, "step": 4689 }, { "epoch": 0.1679588876752556, "grad_norm": 1.4658147096633911, "learning_rate": 0.00019018369812622384, "loss": 1.7161, "step": 4690 }, { "epoch": 0.1679946998048239, "grad_norm": 1.6705026626586914, "learning_rate": 0.0001901786858592419, "loss": 1.464, "step": 4691 }, { "epoch": 0.16803051193439217, "grad_norm": 2.0256388187408447, "learning_rate": 0.00019017367237902206, "loss": 1.2567, "step": 4692 }, { "epoch": 0.16806632406396046, "grad_norm": 1.6162030696868896, "learning_rate": 0.00019016865768563176, "loss": 1.7855, "step": 4693 }, { "epoch": 0.16810213619352876, "grad_norm": 1.8909845352172852, "learning_rate": 0.0001901636417791385, "loss": 1.7189, "step": 4694 }, { "epoch": 0.16813794832309703, "grad_norm": 1.6075421571731567, "learning_rate": 0.00019015862465960974, "loss": 1.7273, "step": 4695 }, { "epoch": 0.16817376045266533, "grad_norm": 1.5975663661956787, "learning_rate": 0.00019015360632711298, "loss": 1.8074, "step": 4696 }, { "epoch": 0.1682095725822336, "grad_norm": 1.947001576423645, "learning_rate": 0.00019014858678171573, "loss": 1.5853, "step": 4697 }, { "epoch": 0.1682453847118019, "grad_norm": 2.0930190086364746, "learning_rate": 0.0001901435660234855, "loss": 1.6109, "step": 4698 }, { "epoch": 0.16828119684137016, "grad_norm": 1.5381509065628052, "learning_rate": 0.00019013854405248992, "loss": 1.6881, "step": 4699 }, { "epoch": 0.16831700897093846, "grad_norm": 1.4447983503341675, "learning_rate": 0.0001901335208687965, "loss": 1.5999, "step": 4700 }, { "epoch": 0.16835282110050676, "grad_norm": 1.7236464023590088, "learning_rate": 0.00019012849647247277, "loss": 1.3609, "step": 4701 }, { "epoch": 0.16838863323007502, "grad_norm": 1.4596633911132812, "learning_rate": 0.0001901234708635864, "loss": 1.5683, "step": 4702 }, { "epoch": 0.16842444535964332, "grad_norm": 1.497183084487915, "learning_rate": 0.00019011844404220497, "loss": 1.502, "step": 4703 }, { "epoch": 0.1684602574892116, "grad_norm": 1.4851388931274414, "learning_rate": 0.00019011341600839616, "loss": 1.4912, "step": 4704 }, { "epoch": 0.1684960696187799, "grad_norm": 1.5417276620864868, "learning_rate": 0.00019010838676222755, "loss": 1.7476, "step": 4705 }, { "epoch": 0.16853188174834816, "grad_norm": 2.275900363922119, "learning_rate": 0.00019010335630376682, "loss": 1.2967, "step": 4706 }, { "epoch": 0.16856769387791645, "grad_norm": 1.881659984588623, "learning_rate": 0.00019009832463308168, "loss": 1.9507, "step": 4707 }, { "epoch": 0.16860350600748472, "grad_norm": 2.0647614002227783, "learning_rate": 0.00019009329175023978, "loss": 1.6887, "step": 4708 }, { "epoch": 0.16863931813705302, "grad_norm": 1.464247226715088, "learning_rate": 0.00019008825765530886, "loss": 1.6486, "step": 4709 }, { "epoch": 0.16867513026662131, "grad_norm": 1.4748529195785522, "learning_rate": 0.00019008322234835662, "loss": 1.5222, "step": 4710 }, { "epoch": 0.16871094239618958, "grad_norm": 1.4888544082641602, "learning_rate": 0.00019007818582945086, "loss": 1.3543, "step": 4711 }, { "epoch": 0.16874675452575788, "grad_norm": 1.683009386062622, "learning_rate": 0.00019007314809865928, "loss": 1.7678, "step": 4712 }, { "epoch": 0.16878256665532615, "grad_norm": 2.0621697902679443, "learning_rate": 0.00019006810915604967, "loss": 1.7165, "step": 4713 }, { "epoch": 0.16881837878489445, "grad_norm": 2.0385336875915527, "learning_rate": 0.00019006306900168983, "loss": 1.6773, "step": 4714 }, { "epoch": 0.16885419091446272, "grad_norm": 1.4429692029953003, "learning_rate": 0.00019005802763564757, "loss": 1.3534, "step": 4715 }, { "epoch": 0.168890003044031, "grad_norm": 1.8206114768981934, "learning_rate": 0.00019005298505799073, "loss": 1.6491, "step": 4716 }, { "epoch": 0.1689258151735993, "grad_norm": 1.4741144180297852, "learning_rate": 0.00019004794126878713, "loss": 1.8381, "step": 4717 }, { "epoch": 0.16896162730316758, "grad_norm": 1.921205759048462, "learning_rate": 0.00019004289626810462, "loss": 1.6051, "step": 4718 }, { "epoch": 0.16899743943273587, "grad_norm": 1.435024619102478, "learning_rate": 0.00019003785005601112, "loss": 1.5402, "step": 4719 }, { "epoch": 0.16903325156230414, "grad_norm": 1.4565465450286865, "learning_rate": 0.00019003280263257447, "loss": 1.6903, "step": 4720 }, { "epoch": 0.16906906369187244, "grad_norm": 1.364829182624817, "learning_rate": 0.0001900277539978626, "loss": 1.5461, "step": 4721 }, { "epoch": 0.1691048758214407, "grad_norm": 1.5410417318344116, "learning_rate": 0.0001900227041519434, "loss": 1.4193, "step": 4722 }, { "epoch": 0.169140687951009, "grad_norm": 1.8465443849563599, "learning_rate": 0.00019001765309488487, "loss": 1.8538, "step": 4723 }, { "epoch": 0.1691765000805773, "grad_norm": 1.8663067817687988, "learning_rate": 0.00019001260082675492, "loss": 1.8133, "step": 4724 }, { "epoch": 0.16921231221014557, "grad_norm": 1.7289997339248657, "learning_rate": 0.00019000754734762153, "loss": 1.6781, "step": 4725 }, { "epoch": 0.16924812433971387, "grad_norm": 1.981075644493103, "learning_rate": 0.0001900024926575527, "loss": 1.5013, "step": 4726 }, { "epoch": 0.16928393646928214, "grad_norm": 1.7916004657745361, "learning_rate": 0.0001899974367566164, "loss": 1.5879, "step": 4727 }, { "epoch": 0.16931974859885043, "grad_norm": 2.0685083866119385, "learning_rate": 0.00018999237964488074, "loss": 1.428, "step": 4728 }, { "epoch": 0.1693555607284187, "grad_norm": 1.6859298944473267, "learning_rate": 0.0001899873213224136, "loss": 1.5873, "step": 4729 }, { "epoch": 0.169391372857987, "grad_norm": 1.8945560455322266, "learning_rate": 0.0001899822617892832, "loss": 1.8325, "step": 4730 }, { "epoch": 0.1694271849875553, "grad_norm": 1.4337220191955566, "learning_rate": 0.0001899772010455575, "loss": 1.5093, "step": 4731 }, { "epoch": 0.16946299711712356, "grad_norm": 1.2983472347259521, "learning_rate": 0.00018997213909130464, "loss": 1.2281, "step": 4732 }, { "epoch": 0.16949880924669186, "grad_norm": 2.4921884536743164, "learning_rate": 0.0001899670759265927, "loss": 1.8038, "step": 4733 }, { "epoch": 0.16953462137626013, "grad_norm": 2.076310634613037, "learning_rate": 0.00018996201155148983, "loss": 1.4219, "step": 4734 }, { "epoch": 0.16957043350582843, "grad_norm": 1.6718064546585083, "learning_rate": 0.0001899569459660641, "loss": 1.8481, "step": 4735 }, { "epoch": 0.1696062456353967, "grad_norm": 1.3863492012023926, "learning_rate": 0.0001899518791703837, "loss": 1.4694, "step": 4736 }, { "epoch": 0.169642057764965, "grad_norm": 1.388734221458435, "learning_rate": 0.0001899468111645168, "loss": 1.6998, "step": 4737 }, { "epoch": 0.1696778698945333, "grad_norm": 2.2914044857025146, "learning_rate": 0.00018994174194853161, "loss": 1.5079, "step": 4738 }, { "epoch": 0.16971368202410156, "grad_norm": 1.6915277242660522, "learning_rate": 0.00018993667152249625, "loss": 1.5748, "step": 4739 }, { "epoch": 0.16974949415366986, "grad_norm": 1.5572466850280762, "learning_rate": 0.00018993159988647901, "loss": 1.8074, "step": 4740 }, { "epoch": 0.16978530628323812, "grad_norm": 1.6799663305282593, "learning_rate": 0.0001899265270405481, "loss": 1.6571, "step": 4741 }, { "epoch": 0.16982111841280642, "grad_norm": 1.6444222927093506, "learning_rate": 0.00018992145298477175, "loss": 1.4765, "step": 4742 }, { "epoch": 0.1698569305423747, "grad_norm": 2.0587503910064697, "learning_rate": 0.00018991637771921825, "loss": 2.0098, "step": 4743 }, { "epoch": 0.169892742671943, "grad_norm": 1.695487141609192, "learning_rate": 0.00018991130124395585, "loss": 1.8735, "step": 4744 }, { "epoch": 0.16992855480151128, "grad_norm": 1.8474738597869873, "learning_rate": 0.0001899062235590529, "loss": 1.5526, "step": 4745 }, { "epoch": 0.16996436693107955, "grad_norm": 1.5542140007019043, "learning_rate": 0.00018990114466457768, "loss": 1.7612, "step": 4746 }, { "epoch": 0.17000017906064785, "grad_norm": 1.6513017416000366, "learning_rate": 0.0001898960645605985, "loss": 1.5931, "step": 4747 }, { "epoch": 0.17003599119021612, "grad_norm": 2.3618006706237793, "learning_rate": 0.00018989098324718375, "loss": 1.5834, "step": 4748 }, { "epoch": 0.17007180331978441, "grad_norm": 1.3786261081695557, "learning_rate": 0.00018988590072440176, "loss": 1.6837, "step": 4749 }, { "epoch": 0.17010761544935268, "grad_norm": 1.8375169038772583, "learning_rate": 0.00018988081699232095, "loss": 1.726, "step": 4750 }, { "epoch": 0.17014342757892098, "grad_norm": 1.6182897090911865, "learning_rate": 0.00018987573205100965, "loss": 1.6424, "step": 4751 }, { "epoch": 0.17017923970848928, "grad_norm": 1.8573373556137085, "learning_rate": 0.00018987064590053634, "loss": 2.111, "step": 4752 }, { "epoch": 0.17021505183805755, "grad_norm": 1.8004847764968872, "learning_rate": 0.0001898655585409694, "loss": 1.5171, "step": 4753 }, { "epoch": 0.17025086396762584, "grad_norm": 1.9292305707931519, "learning_rate": 0.00018986046997237726, "loss": 1.4811, "step": 4754 }, { "epoch": 0.1702866760971941, "grad_norm": 1.3681687116622925, "learning_rate": 0.00018985538019482842, "loss": 1.6534, "step": 4755 }, { "epoch": 0.1703224882267624, "grad_norm": 2.1036245822906494, "learning_rate": 0.00018985028920839137, "loss": 1.6907, "step": 4756 }, { "epoch": 0.17035830035633068, "grad_norm": 1.4106781482696533, "learning_rate": 0.00018984519701313455, "loss": 1.5812, "step": 4757 }, { "epoch": 0.17039411248589897, "grad_norm": 1.4277902841567993, "learning_rate": 0.0001898401036091265, "loss": 1.5905, "step": 4758 }, { "epoch": 0.17042992461546727, "grad_norm": 1.5859612226486206, "learning_rate": 0.00018983500899643577, "loss": 1.9137, "step": 4759 }, { "epoch": 0.17046573674503554, "grad_norm": 1.3639755249023438, "learning_rate": 0.00018982991317513084, "loss": 1.4867, "step": 4760 }, { "epoch": 0.17050154887460384, "grad_norm": 2.493811845779419, "learning_rate": 0.0001898248161452803, "loss": 1.4949, "step": 4761 }, { "epoch": 0.1705373610041721, "grad_norm": 1.4881577491760254, "learning_rate": 0.00018981971790695275, "loss": 1.5378, "step": 4762 }, { "epoch": 0.1705731731337404, "grad_norm": 1.687936544418335, "learning_rate": 0.0001898146184602167, "loss": 1.6787, "step": 4763 }, { "epoch": 0.17060898526330867, "grad_norm": 1.3773003816604614, "learning_rate": 0.0001898095178051409, "loss": 1.4484, "step": 4764 }, { "epoch": 0.17064479739287697, "grad_norm": 1.2580550909042358, "learning_rate": 0.0001898044159417938, "loss": 1.5878, "step": 4765 }, { "epoch": 0.17068060952244526, "grad_norm": 1.4113050699234009, "learning_rate": 0.00018979931287024416, "loss": 1.2658, "step": 4766 }, { "epoch": 0.17071642165201353, "grad_norm": 2.062539577484131, "learning_rate": 0.00018979420859056062, "loss": 1.7766, "step": 4767 }, { "epoch": 0.17075223378158183, "grad_norm": 2.4932713508605957, "learning_rate": 0.0001897891031028118, "loss": 1.9452, "step": 4768 }, { "epoch": 0.1707880459111501, "grad_norm": 1.9760795831680298, "learning_rate": 0.0001897839964070664, "loss": 1.4249, "step": 4769 }, { "epoch": 0.1708238580407184, "grad_norm": 2.0726683139801025, "learning_rate": 0.00018977888850339319, "loss": 1.7297, "step": 4770 }, { "epoch": 0.17085967017028666, "grad_norm": 1.313335657119751, "learning_rate": 0.0001897737793918608, "loss": 1.5473, "step": 4771 }, { "epoch": 0.17089548229985496, "grad_norm": 1.480515956878662, "learning_rate": 0.00018976866907253803, "loss": 1.2051, "step": 4772 }, { "epoch": 0.17093129442942326, "grad_norm": 1.6562939882278442, "learning_rate": 0.0001897635575454936, "loss": 1.6465, "step": 4773 }, { "epoch": 0.17096710655899153, "grad_norm": 1.5738717317581177, "learning_rate": 0.0001897584448107963, "loss": 1.7648, "step": 4774 }, { "epoch": 0.17100291868855982, "grad_norm": 1.9761050939559937, "learning_rate": 0.0001897533308685149, "loss": 1.6079, "step": 4775 }, { "epoch": 0.1710387308181281, "grad_norm": 1.6524931192398071, "learning_rate": 0.00018974821571871822, "loss": 1.5807, "step": 4776 }, { "epoch": 0.1710745429476964, "grad_norm": 1.2474535703659058, "learning_rate": 0.00018974309936147502, "loss": 1.3729, "step": 4777 }, { "epoch": 0.17111035507726466, "grad_norm": 1.4474204778671265, "learning_rate": 0.00018973798179685423, "loss": 1.4076, "step": 4778 }, { "epoch": 0.17114616720683296, "grad_norm": 1.3599224090576172, "learning_rate": 0.0001897328630249246, "loss": 1.5103, "step": 4779 }, { "epoch": 0.17118197933640125, "grad_norm": 1.535048484802246, "learning_rate": 0.0001897277430457551, "loss": 1.3841, "step": 4780 }, { "epoch": 0.17121779146596952, "grad_norm": 1.823050856590271, "learning_rate": 0.00018972262185941452, "loss": 1.7171, "step": 4781 }, { "epoch": 0.17125360359553782, "grad_norm": 2.32497239112854, "learning_rate": 0.00018971749946597178, "loss": 1.4991, "step": 4782 }, { "epoch": 0.1712894157251061, "grad_norm": 1.427659511566162, "learning_rate": 0.00018971237586549587, "loss": 1.5099, "step": 4783 }, { "epoch": 0.17132522785467438, "grad_norm": 1.4457180500030518, "learning_rate": 0.00018970725105805562, "loss": 1.5058, "step": 4784 }, { "epoch": 0.17136103998424265, "grad_norm": 1.7552998065948486, "learning_rate": 0.00018970212504372004, "loss": 1.5277, "step": 4785 }, { "epoch": 0.17139685211381095, "grad_norm": 1.6383708715438843, "learning_rate": 0.00018969699782255808, "loss": 1.8856, "step": 4786 }, { "epoch": 0.17143266424337925, "grad_norm": 1.7544535398483276, "learning_rate": 0.0001896918693946387, "loss": 1.4428, "step": 4787 }, { "epoch": 0.17146847637294751, "grad_norm": 1.2920863628387451, "learning_rate": 0.0001896867397600309, "loss": 1.7034, "step": 4788 }, { "epoch": 0.1715042885025158, "grad_norm": 2.2302498817443848, "learning_rate": 0.00018968160891880373, "loss": 1.9788, "step": 4789 }, { "epoch": 0.17154010063208408, "grad_norm": 1.1154465675354004, "learning_rate": 0.00018967647687102618, "loss": 1.2938, "step": 4790 }, { "epoch": 0.17157591276165238, "grad_norm": 2.1247408390045166, "learning_rate": 0.00018967134361676732, "loss": 1.5288, "step": 4791 }, { "epoch": 0.17161172489122065, "grad_norm": 1.328395128250122, "learning_rate": 0.00018966620915609618, "loss": 1.5552, "step": 4792 }, { "epoch": 0.17164753702078894, "grad_norm": 2.552074432373047, "learning_rate": 0.00018966107348908188, "loss": 1.4236, "step": 4793 }, { "epoch": 0.17168334915035724, "grad_norm": 1.6310802698135376, "learning_rate": 0.00018965593661579347, "loss": 1.6218, "step": 4794 }, { "epoch": 0.1717191612799255, "grad_norm": 1.940028429031372, "learning_rate": 0.00018965079853630007, "loss": 1.7298, "step": 4795 }, { "epoch": 0.1717549734094938, "grad_norm": 1.7597527503967285, "learning_rate": 0.00018964565925067085, "loss": 1.7404, "step": 4796 }, { "epoch": 0.17179078553906207, "grad_norm": 1.7288353443145752, "learning_rate": 0.0001896405187589749, "loss": 1.4283, "step": 4797 }, { "epoch": 0.17182659766863037, "grad_norm": 1.8926104307174683, "learning_rate": 0.0001896353770612814, "loss": 1.7908, "step": 4798 }, { "epoch": 0.17186240979819864, "grad_norm": 2.0064892768859863, "learning_rate": 0.00018963023415765956, "loss": 1.5197, "step": 4799 }, { "epoch": 0.17189822192776694, "grad_norm": 1.9775582551956177, "learning_rate": 0.00018962509004817846, "loss": 1.7128, "step": 4800 }, { "epoch": 0.17193403405733523, "grad_norm": 1.5334006547927856, "learning_rate": 0.00018961994473290744, "loss": 1.5295, "step": 4801 }, { "epoch": 0.1719698461869035, "grad_norm": 1.7124851942062378, "learning_rate": 0.00018961479821191562, "loss": 1.5951, "step": 4802 }, { "epoch": 0.1720056583164718, "grad_norm": 1.847869634628296, "learning_rate": 0.00018960965048527232, "loss": 1.6884, "step": 4803 }, { "epoch": 0.17204147044604007, "grad_norm": 1.9723440408706665, "learning_rate": 0.00018960450155304677, "loss": 1.4846, "step": 4804 }, { "epoch": 0.17207728257560836, "grad_norm": 1.7727763652801514, "learning_rate": 0.00018959935141530821, "loss": 1.5497, "step": 4805 }, { "epoch": 0.17211309470517663, "grad_norm": 1.8374532461166382, "learning_rate": 0.00018959420007212593, "loss": 1.2231, "step": 4806 }, { "epoch": 0.17214890683474493, "grad_norm": 1.577351450920105, "learning_rate": 0.0001895890475235693, "loss": 1.5661, "step": 4807 }, { "epoch": 0.1721847189643132, "grad_norm": 1.4347150325775146, "learning_rate": 0.00018958389376970758, "loss": 1.644, "step": 4808 }, { "epoch": 0.1722205310938815, "grad_norm": 1.2892824411392212, "learning_rate": 0.00018957873881061014, "loss": 1.407, "step": 4809 }, { "epoch": 0.1722563432234498, "grad_norm": 1.7509137392044067, "learning_rate": 0.00018957358264634627, "loss": 1.8629, "step": 4810 }, { "epoch": 0.17229215535301806, "grad_norm": 1.6735416650772095, "learning_rate": 0.0001895684252769854, "loss": 1.3715, "step": 4811 }, { "epoch": 0.17232796748258636, "grad_norm": 1.627367615699768, "learning_rate": 0.00018956326670259695, "loss": 1.3836, "step": 4812 }, { "epoch": 0.17236377961215463, "grad_norm": 1.8383601903915405, "learning_rate": 0.00018955810692325025, "loss": 1.6612, "step": 4813 }, { "epoch": 0.17239959174172292, "grad_norm": 1.8072162866592407, "learning_rate": 0.00018955294593901476, "loss": 1.7922, "step": 4814 }, { "epoch": 0.1724354038712912, "grad_norm": 1.667475938796997, "learning_rate": 0.00018954778374995988, "loss": 1.4375, "step": 4815 }, { "epoch": 0.1724712160008595, "grad_norm": 1.460464358329773, "learning_rate": 0.00018954262035615505, "loss": 1.8729, "step": 4816 }, { "epoch": 0.1725070281304278, "grad_norm": 1.6498515605926514, "learning_rate": 0.0001895374557576698, "loss": 1.7033, "step": 4817 }, { "epoch": 0.17254284025999606, "grad_norm": 1.6040598154067993, "learning_rate": 0.00018953228995457355, "loss": 1.5721, "step": 4818 }, { "epoch": 0.17257865238956435, "grad_norm": 1.5885403156280518, "learning_rate": 0.00018952712294693585, "loss": 1.6447, "step": 4819 }, { "epoch": 0.17261446451913262, "grad_norm": 1.9566494226455688, "learning_rate": 0.0001895219547348262, "loss": 1.316, "step": 4820 }, { "epoch": 0.17265027664870092, "grad_norm": 2.1236560344696045, "learning_rate": 0.0001895167853183141, "loss": 1.8277, "step": 4821 }, { "epoch": 0.1726860887782692, "grad_norm": 2.2870054244995117, "learning_rate": 0.00018951161469746915, "loss": 1.4881, "step": 4822 }, { "epoch": 0.17272190090783748, "grad_norm": 1.455335259437561, "learning_rate": 0.00018950644287236084, "loss": 1.8649, "step": 4823 }, { "epoch": 0.17275771303740578, "grad_norm": 1.4912785291671753, "learning_rate": 0.00018950126984305885, "loss": 1.4861, "step": 4824 }, { "epoch": 0.17279352516697405, "grad_norm": 1.774718165397644, "learning_rate": 0.0001894960956096327, "loss": 1.4081, "step": 4825 }, { "epoch": 0.17282933729654235, "grad_norm": 1.8963184356689453, "learning_rate": 0.000189490920172152, "loss": 1.7869, "step": 4826 }, { "epoch": 0.17286514942611061, "grad_norm": 1.4983773231506348, "learning_rate": 0.00018948574353068643, "loss": 1.7589, "step": 4827 }, { "epoch": 0.1729009615556789, "grad_norm": 1.7496103048324585, "learning_rate": 0.0001894805656853056, "loss": 1.6636, "step": 4828 }, { "epoch": 0.17293677368524718, "grad_norm": 1.442915678024292, "learning_rate": 0.00018947538663607918, "loss": 1.2341, "step": 4829 }, { "epoch": 0.17297258581481548, "grad_norm": 2.0862679481506348, "learning_rate": 0.00018947020638307687, "loss": 1.6988, "step": 4830 }, { "epoch": 0.17300839794438377, "grad_norm": 1.8809422254562378, "learning_rate": 0.0001894650249263683, "loss": 1.7472, "step": 4831 }, { "epoch": 0.17304421007395204, "grad_norm": 1.4286421537399292, "learning_rate": 0.00018945984226602326, "loss": 1.7746, "step": 4832 }, { "epoch": 0.17308002220352034, "grad_norm": 1.5385067462921143, "learning_rate": 0.00018945465840211143, "loss": 1.7762, "step": 4833 }, { "epoch": 0.1731158343330886, "grad_norm": 1.8565199375152588, "learning_rate": 0.00018944947333470252, "loss": 1.6201, "step": 4834 }, { "epoch": 0.1731516464626569, "grad_norm": 1.7545292377471924, "learning_rate": 0.0001894442870638664, "loss": 1.468, "step": 4835 }, { "epoch": 0.17318745859222517, "grad_norm": 2.246614456176758, "learning_rate": 0.00018943909958967273, "loss": 1.7679, "step": 4836 }, { "epoch": 0.17322327072179347, "grad_norm": 1.7241929769515991, "learning_rate": 0.00018943391091219133, "loss": 1.6106, "step": 4837 }, { "epoch": 0.17325908285136177, "grad_norm": 1.5218449831008911, "learning_rate": 0.00018942872103149206, "loss": 1.7442, "step": 4838 }, { "epoch": 0.17329489498093004, "grad_norm": 1.9982221126556396, "learning_rate": 0.00018942352994764464, "loss": 1.6432, "step": 4839 }, { "epoch": 0.17333070711049833, "grad_norm": 2.3945224285125732, "learning_rate": 0.00018941833766071903, "loss": 1.7619, "step": 4840 }, { "epoch": 0.1733665192400666, "grad_norm": 1.7721819877624512, "learning_rate": 0.00018941314417078502, "loss": 1.5272, "step": 4841 }, { "epoch": 0.1734023313696349, "grad_norm": 1.349071979522705, "learning_rate": 0.00018940794947791247, "loss": 1.3999, "step": 4842 }, { "epoch": 0.17343814349920317, "grad_norm": 1.555430293083191, "learning_rate": 0.0001894027535821713, "loss": 1.5955, "step": 4843 }, { "epoch": 0.17347395562877146, "grad_norm": 1.8791706562042236, "learning_rate": 0.0001893975564836314, "loss": 1.5432, "step": 4844 }, { "epoch": 0.17350976775833976, "grad_norm": 2.6219892501831055, "learning_rate": 0.00018939235818236268, "loss": 2.0688, "step": 4845 }, { "epoch": 0.17354557988790803, "grad_norm": 2.0889711380004883, "learning_rate": 0.00018938715867843512, "loss": 1.4781, "step": 4846 }, { "epoch": 0.17358139201747633, "grad_norm": 1.7666796445846558, "learning_rate": 0.0001893819579719186, "loss": 1.4148, "step": 4847 }, { "epoch": 0.1736172041470446, "grad_norm": 2.062335968017578, "learning_rate": 0.00018937675606288317, "loss": 1.4671, "step": 4848 }, { "epoch": 0.1736530162766129, "grad_norm": 1.5806844234466553, "learning_rate": 0.00018937155295139878, "loss": 1.7065, "step": 4849 }, { "epoch": 0.17368882840618116, "grad_norm": 1.7525285482406616, "learning_rate": 0.00018936634863753537, "loss": 1.5066, "step": 4850 }, { "epoch": 0.17372464053574946, "grad_norm": 1.3670426607131958, "learning_rate": 0.00018936114312136307, "loss": 1.5515, "step": 4851 }, { "epoch": 0.17376045266531776, "grad_norm": 1.4752345085144043, "learning_rate": 0.0001893559364029518, "loss": 1.4657, "step": 4852 }, { "epoch": 0.17379626479488602, "grad_norm": 1.8495032787322998, "learning_rate": 0.00018935072848237172, "loss": 1.6514, "step": 4853 }, { "epoch": 0.17383207692445432, "grad_norm": 2.1843106746673584, "learning_rate": 0.00018934551935969284, "loss": 1.4489, "step": 4854 }, { "epoch": 0.1738678890540226, "grad_norm": 1.5473827123641968, "learning_rate": 0.00018934030903498518, "loss": 1.5436, "step": 4855 }, { "epoch": 0.1739037011835909, "grad_norm": 1.4357026815414429, "learning_rate": 0.00018933509750831897, "loss": 1.6933, "step": 4856 }, { "epoch": 0.17393951331315916, "grad_norm": 1.5008283853530884, "learning_rate": 0.00018932988477976423, "loss": 1.6496, "step": 4857 }, { "epoch": 0.17397532544272745, "grad_norm": 1.748734474182129, "learning_rate": 0.0001893246708493911, "loss": 1.6295, "step": 4858 }, { "epoch": 0.17401113757229575, "grad_norm": 2.9435081481933594, "learning_rate": 0.0001893194557172698, "loss": 1.3209, "step": 4859 }, { "epoch": 0.17404694970186402, "grad_norm": 1.4894545078277588, "learning_rate": 0.0001893142393834704, "loss": 1.7977, "step": 4860 }, { "epoch": 0.17408276183143231, "grad_norm": 2.0931038856506348, "learning_rate": 0.00018930902184806313, "loss": 1.9863, "step": 4861 }, { "epoch": 0.17411857396100058, "grad_norm": 1.7065221071243286, "learning_rate": 0.00018930380311111815, "loss": 1.4548, "step": 4862 }, { "epoch": 0.17415438609056888, "grad_norm": 1.5173895359039307, "learning_rate": 0.0001892985831727057, "loss": 1.607, "step": 4863 }, { "epoch": 0.17419019822013715, "grad_norm": 1.4496746063232422, "learning_rate": 0.000189293362032896, "loss": 1.4503, "step": 4864 }, { "epoch": 0.17422601034970545, "grad_norm": 1.7697582244873047, "learning_rate": 0.00018928813969175932, "loss": 1.6072, "step": 4865 }, { "epoch": 0.17426182247927374, "grad_norm": 1.6433504819869995, "learning_rate": 0.00018928291614936585, "loss": 1.7286, "step": 4866 }, { "epoch": 0.174297634608842, "grad_norm": 1.8499150276184082, "learning_rate": 0.00018927769140578593, "loss": 1.5328, "step": 4867 }, { "epoch": 0.1743334467384103, "grad_norm": 2.2025258541107178, "learning_rate": 0.00018927246546108985, "loss": 1.755, "step": 4868 }, { "epoch": 0.17436925886797858, "grad_norm": 2.001037120819092, "learning_rate": 0.00018926723831534789, "loss": 1.4364, "step": 4869 }, { "epoch": 0.17440507099754687, "grad_norm": 1.4569237232208252, "learning_rate": 0.00018926200996863038, "loss": 1.8061, "step": 4870 }, { "epoch": 0.17444088312711514, "grad_norm": 1.2852599620819092, "learning_rate": 0.00018925678042100766, "loss": 1.2263, "step": 4871 }, { "epoch": 0.17447669525668344, "grad_norm": 1.772127628326416, "learning_rate": 0.0001892515496725501, "loss": 1.4574, "step": 4872 }, { "epoch": 0.17451250738625174, "grad_norm": 1.3789273500442505, "learning_rate": 0.00018924631772332807, "loss": 1.4344, "step": 4873 }, { "epoch": 0.17454831951582, "grad_norm": 1.595746397972107, "learning_rate": 0.00018924108457341195, "loss": 1.9308, "step": 4874 }, { "epoch": 0.1745841316453883, "grad_norm": 1.9921436309814453, "learning_rate": 0.00018923585022287214, "loss": 1.5553, "step": 4875 }, { "epoch": 0.17461994377495657, "grad_norm": 1.2532308101654053, "learning_rate": 0.00018923061467177908, "loss": 1.5337, "step": 4876 }, { "epoch": 0.17465575590452487, "grad_norm": 1.189430832862854, "learning_rate": 0.0001892253779202032, "loss": 1.4088, "step": 4877 }, { "epoch": 0.17469156803409314, "grad_norm": 2.0476043224334717, "learning_rate": 0.00018922013996821492, "loss": 1.2659, "step": 4878 }, { "epoch": 0.17472738016366143, "grad_norm": 2.3595757484436035, "learning_rate": 0.0001892149008158848, "loss": 1.4595, "step": 4879 }, { "epoch": 0.17476319229322973, "grad_norm": 1.562600016593933, "learning_rate": 0.00018920966046328324, "loss": 1.5422, "step": 4880 }, { "epoch": 0.174799004422798, "grad_norm": 1.5140035152435303, "learning_rate": 0.00018920441891048077, "loss": 1.628, "step": 4881 }, { "epoch": 0.1748348165523663, "grad_norm": 1.220018744468689, "learning_rate": 0.00018919917615754792, "loss": 1.4796, "step": 4882 }, { "epoch": 0.17487062868193456, "grad_norm": 1.8878225088119507, "learning_rate": 0.00018919393220455518, "loss": 1.2749, "step": 4883 }, { "epoch": 0.17490644081150286, "grad_norm": 1.5586538314819336, "learning_rate": 0.00018918868705157318, "loss": 1.4179, "step": 4884 }, { "epoch": 0.17494225294107113, "grad_norm": 1.4380314350128174, "learning_rate": 0.00018918344069867243, "loss": 1.5866, "step": 4885 }, { "epoch": 0.17497806507063943, "grad_norm": 3.5423998832702637, "learning_rate": 0.00018917819314592351, "loss": 1.9557, "step": 4886 }, { "epoch": 0.17501387720020772, "grad_norm": 1.9124844074249268, "learning_rate": 0.00018917294439339705, "loss": 1.517, "step": 4887 }, { "epoch": 0.175049689329776, "grad_norm": 1.3516485691070557, "learning_rate": 0.0001891676944411636, "loss": 1.5067, "step": 4888 }, { "epoch": 0.1750855014593443, "grad_norm": 1.858694076538086, "learning_rate": 0.00018916244328929388, "loss": 1.4675, "step": 4889 }, { "epoch": 0.17512131358891256, "grad_norm": 1.8786976337432861, "learning_rate": 0.00018915719093785848, "loss": 1.5355, "step": 4890 }, { "epoch": 0.17515712571848085, "grad_norm": 1.3482768535614014, "learning_rate": 0.00018915193738692812, "loss": 1.6203, "step": 4891 }, { "epoch": 0.17519293784804912, "grad_norm": 1.3710525035858154, "learning_rate": 0.00018914668263657342, "loss": 1.3993, "step": 4892 }, { "epoch": 0.17522874997761742, "grad_norm": 1.5821046829223633, "learning_rate": 0.00018914142668686505, "loss": 1.5148, "step": 4893 }, { "epoch": 0.17526456210718572, "grad_norm": 2.2228548526763916, "learning_rate": 0.0001891361695378738, "loss": 1.9352, "step": 4894 }, { "epoch": 0.175300374236754, "grad_norm": 3.116131544113159, "learning_rate": 0.00018913091118967037, "loss": 1.8747, "step": 4895 }, { "epoch": 0.17533618636632228, "grad_norm": 1.6172984838485718, "learning_rate": 0.00018912565164232552, "loss": 1.3994, "step": 4896 }, { "epoch": 0.17537199849589055, "grad_norm": 1.4550762176513672, "learning_rate": 0.00018912039089590997, "loss": 1.4414, "step": 4897 }, { "epoch": 0.17540781062545885, "grad_norm": 2.1344704627990723, "learning_rate": 0.00018911512895049452, "loss": 1.6229, "step": 4898 }, { "epoch": 0.17544362275502712, "grad_norm": 1.5842924118041992, "learning_rate": 0.00018910986580614997, "loss": 1.325, "step": 4899 }, { "epoch": 0.17547943488459541, "grad_norm": 1.6697801351547241, "learning_rate": 0.00018910460146294707, "loss": 1.5925, "step": 4900 }, { "epoch": 0.1755152470141637, "grad_norm": 1.509250521659851, "learning_rate": 0.00018909933592095674, "loss": 1.4572, "step": 4901 }, { "epoch": 0.17555105914373198, "grad_norm": 1.783841609954834, "learning_rate": 0.0001890940691802498, "loss": 1.6234, "step": 4902 }, { "epoch": 0.17558687127330028, "grad_norm": 1.7891788482666016, "learning_rate": 0.00018908880124089702, "loss": 1.458, "step": 4903 }, { "epoch": 0.17562268340286855, "grad_norm": 2.8010456562042236, "learning_rate": 0.0001890835321029694, "loss": 1.5074, "step": 4904 }, { "epoch": 0.17565849553243684, "grad_norm": 1.094394564628601, "learning_rate": 0.00018907826176653772, "loss": 1.3326, "step": 4905 }, { "epoch": 0.1756943076620051, "grad_norm": 1.3641574382781982, "learning_rate": 0.00018907299023167293, "loss": 1.7497, "step": 4906 }, { "epoch": 0.1757301197915734, "grad_norm": 1.4977530241012573, "learning_rate": 0.00018906771749844595, "loss": 1.5956, "step": 4907 }, { "epoch": 0.17576593192114168, "grad_norm": 1.44773268699646, "learning_rate": 0.00018906244356692775, "loss": 1.7386, "step": 4908 }, { "epoch": 0.17580174405070997, "grad_norm": 1.46619713306427, "learning_rate": 0.00018905716843718926, "loss": 1.6829, "step": 4909 }, { "epoch": 0.17583755618027827, "grad_norm": 4.56627893447876, "learning_rate": 0.00018905189210930142, "loss": 1.5221, "step": 4910 }, { "epoch": 0.17587336830984654, "grad_norm": 1.9148731231689453, "learning_rate": 0.00018904661458333524, "loss": 1.5686, "step": 4911 }, { "epoch": 0.17590918043941484, "grad_norm": 1.6884065866470337, "learning_rate": 0.00018904133585936173, "loss": 1.531, "step": 4912 }, { "epoch": 0.1759449925689831, "grad_norm": 1.323333740234375, "learning_rate": 0.0001890360559374519, "loss": 1.7735, "step": 4913 }, { "epoch": 0.1759808046985514, "grad_norm": 2.1087372303009033, "learning_rate": 0.00018903077481767676, "loss": 1.305, "step": 4914 }, { "epoch": 0.17601661682811967, "grad_norm": 2.0046706199645996, "learning_rate": 0.00018902549250010743, "loss": 1.5707, "step": 4915 }, { "epoch": 0.17605242895768797, "grad_norm": 2.4085426330566406, "learning_rate": 0.0001890202089848149, "loss": 1.6196, "step": 4916 }, { "epoch": 0.17608824108725626, "grad_norm": 1.8258897066116333, "learning_rate": 0.00018901492427187032, "loss": 1.5813, "step": 4917 }, { "epoch": 0.17612405321682453, "grad_norm": 1.4927031993865967, "learning_rate": 0.0001890096383613447, "loss": 1.6213, "step": 4918 }, { "epoch": 0.17615986534639283, "grad_norm": 1.136582612991333, "learning_rate": 0.00018900435125330923, "loss": 1.4248, "step": 4919 }, { "epoch": 0.1761956774759611, "grad_norm": 1.3010669946670532, "learning_rate": 0.00018899906294783504, "loss": 1.6543, "step": 4920 }, { "epoch": 0.1762314896055294, "grad_norm": 1.9157588481903076, "learning_rate": 0.00018899377344499328, "loss": 1.5228, "step": 4921 }, { "epoch": 0.17626730173509766, "grad_norm": 2.504683494567871, "learning_rate": 0.00018898848274485506, "loss": 2.0403, "step": 4922 }, { "epoch": 0.17630311386466596, "grad_norm": 1.409203052520752, "learning_rate": 0.00018898319084749158, "loss": 1.2702, "step": 4923 }, { "epoch": 0.17633892599423426, "grad_norm": 2.0909388065338135, "learning_rate": 0.00018897789775297404, "loss": 1.8806, "step": 4924 }, { "epoch": 0.17637473812380253, "grad_norm": 1.3817533254623413, "learning_rate": 0.0001889726034613737, "loss": 1.5544, "step": 4925 }, { "epoch": 0.17641055025337082, "grad_norm": 1.9548993110656738, "learning_rate": 0.00018896730797276175, "loss": 1.6944, "step": 4926 }, { "epoch": 0.1764463623829391, "grad_norm": 1.3529974222183228, "learning_rate": 0.00018896201128720938, "loss": 1.471, "step": 4927 }, { "epoch": 0.1764821745125074, "grad_norm": 1.5662931203842163, "learning_rate": 0.00018895671340478796, "loss": 1.5855, "step": 4928 }, { "epoch": 0.17651798664207566, "grad_norm": 1.9040552377700806, "learning_rate": 0.00018895141432556867, "loss": 1.4631, "step": 4929 }, { "epoch": 0.17655379877164395, "grad_norm": 2.3355555534362793, "learning_rate": 0.00018894611404962283, "loss": 1.6409, "step": 4930 }, { "epoch": 0.17658961090121225, "grad_norm": 1.3861925601959229, "learning_rate": 0.0001889408125770218, "loss": 1.5785, "step": 4931 }, { "epoch": 0.17662542303078052, "grad_norm": 1.3609862327575684, "learning_rate": 0.00018893550990783684, "loss": 1.4329, "step": 4932 }, { "epoch": 0.17666123516034882, "grad_norm": 1.739033818244934, "learning_rate": 0.00018893020604213932, "loss": 1.6007, "step": 4933 }, { "epoch": 0.1766970472899171, "grad_norm": 2.2016334533691406, "learning_rate": 0.00018892490098000055, "loss": 1.4416, "step": 4934 }, { "epoch": 0.17673285941948538, "grad_norm": 1.7724460363388062, "learning_rate": 0.00018891959472149198, "loss": 1.4571, "step": 4935 }, { "epoch": 0.17676867154905365, "grad_norm": 2.670675754547119, "learning_rate": 0.00018891428726668495, "loss": 1.5139, "step": 4936 }, { "epoch": 0.17680448367862195, "grad_norm": 1.693932294845581, "learning_rate": 0.00018890897861565086, "loss": 2.0678, "step": 4937 }, { "epoch": 0.17684029580819025, "grad_norm": 1.6531952619552612, "learning_rate": 0.00018890366876846119, "loss": 1.7776, "step": 4938 }, { "epoch": 0.17687610793775851, "grad_norm": 2.104870080947876, "learning_rate": 0.00018889835772518731, "loss": 1.5346, "step": 4939 }, { "epoch": 0.1769119200673268, "grad_norm": 1.7648040056228638, "learning_rate": 0.00018889304548590067, "loss": 1.8308, "step": 4940 }, { "epoch": 0.17694773219689508, "grad_norm": 1.3178796768188477, "learning_rate": 0.00018888773205067282, "loss": 1.7215, "step": 4941 }, { "epoch": 0.17698354432646338, "grad_norm": 1.2029863595962524, "learning_rate": 0.00018888241741957514, "loss": 1.5563, "step": 4942 }, { "epoch": 0.17701935645603165, "grad_norm": 2.1883883476257324, "learning_rate": 0.00018887710159267923, "loss": 1.7515, "step": 4943 }, { "epoch": 0.17705516858559994, "grad_norm": 1.2338509559631348, "learning_rate": 0.00018887178457005653, "loss": 1.4304, "step": 4944 }, { "epoch": 0.17709098071516824, "grad_norm": 1.7541881799697876, "learning_rate": 0.00018886646635177864, "loss": 1.7429, "step": 4945 }, { "epoch": 0.1771267928447365, "grad_norm": 1.4852969646453857, "learning_rate": 0.00018886114693791704, "loss": 1.4427, "step": 4946 }, { "epoch": 0.1771626049743048, "grad_norm": 1.5804656744003296, "learning_rate": 0.00018885582632854333, "loss": 1.431, "step": 4947 }, { "epoch": 0.17719841710387307, "grad_norm": 1.3191192150115967, "learning_rate": 0.00018885050452372912, "loss": 1.5655, "step": 4948 }, { "epoch": 0.17723422923344137, "grad_norm": 2.596315383911133, "learning_rate": 0.00018884518152354596, "loss": 1.496, "step": 4949 }, { "epoch": 0.17727004136300964, "grad_norm": 1.8313298225402832, "learning_rate": 0.00018883985732806547, "loss": 1.5148, "step": 4950 }, { "epoch": 0.17730585349257794, "grad_norm": 1.5022848844528198, "learning_rate": 0.00018883453193735932, "loss": 1.4678, "step": 4951 }, { "epoch": 0.17734166562214623, "grad_norm": 1.8129926919937134, "learning_rate": 0.00018882920535149913, "loss": 1.5432, "step": 4952 }, { "epoch": 0.1773774777517145, "grad_norm": 1.5845561027526855, "learning_rate": 0.00018882387757055655, "loss": 1.9524, "step": 4953 }, { "epoch": 0.1774132898812828, "grad_norm": 1.0808457136154175, "learning_rate": 0.00018881854859460328, "loss": 1.643, "step": 4954 }, { "epoch": 0.17744910201085107, "grad_norm": 1.7996083498001099, "learning_rate": 0.00018881321842371103, "loss": 1.6271, "step": 4955 }, { "epoch": 0.17748491414041936, "grad_norm": 1.7075645923614502, "learning_rate": 0.00018880788705795144, "loss": 1.8461, "step": 4956 }, { "epoch": 0.17752072626998763, "grad_norm": 1.3339205980300903, "learning_rate": 0.00018880255449739634, "loss": 1.5211, "step": 4957 }, { "epoch": 0.17755653839955593, "grad_norm": 1.3496900796890259, "learning_rate": 0.00018879722074211736, "loss": 1.5706, "step": 4958 }, { "epoch": 0.17759235052912423, "grad_norm": 1.2548480033874512, "learning_rate": 0.00018879188579218635, "loss": 1.5067, "step": 4959 }, { "epoch": 0.1776281626586925, "grad_norm": 1.7974036931991577, "learning_rate": 0.000188786549647675, "loss": 1.38, "step": 4960 }, { "epoch": 0.1776639747882608, "grad_norm": 1.8285739421844482, "learning_rate": 0.0001887812123086552, "loss": 1.8508, "step": 4961 }, { "epoch": 0.17769978691782906, "grad_norm": 1.8094350099563599, "learning_rate": 0.0001887758737751987, "loss": 1.4213, "step": 4962 }, { "epoch": 0.17773559904739736, "grad_norm": 1.3000043630599976, "learning_rate": 0.00018877053404737734, "loss": 1.6764, "step": 4963 }, { "epoch": 0.17777141117696563, "grad_norm": 1.428836703300476, "learning_rate": 0.00018876519312526293, "loss": 1.6104, "step": 4964 }, { "epoch": 0.17780722330653392, "grad_norm": 1.7616479396820068, "learning_rate": 0.00018875985100892738, "loss": 1.2714, "step": 4965 }, { "epoch": 0.17784303543610222, "grad_norm": 1.2152507305145264, "learning_rate": 0.0001887545076984425, "loss": 1.6792, "step": 4966 }, { "epoch": 0.1778788475656705, "grad_norm": 1.5839030742645264, "learning_rate": 0.0001887491631938802, "loss": 1.2338, "step": 4967 }, { "epoch": 0.17791465969523879, "grad_norm": 3.108227014541626, "learning_rate": 0.0001887438174953124, "loss": 1.7161, "step": 4968 }, { "epoch": 0.17795047182480705, "grad_norm": 1.628882884979248, "learning_rate": 0.000188738470602811, "loss": 1.9082, "step": 4969 }, { "epoch": 0.17798628395437535, "grad_norm": 1.6627739667892456, "learning_rate": 0.00018873312251644793, "loss": 1.5598, "step": 4970 }, { "epoch": 0.17802209608394362, "grad_norm": 1.4799766540527344, "learning_rate": 0.00018872777323629514, "loss": 1.3478, "step": 4971 }, { "epoch": 0.17805790821351192, "grad_norm": 2.2663679122924805, "learning_rate": 0.00018872242276242465, "loss": 1.8249, "step": 4972 }, { "epoch": 0.17809372034308021, "grad_norm": 1.7324576377868652, "learning_rate": 0.0001887170710949084, "loss": 1.9787, "step": 4973 }, { "epoch": 0.17812953247264848, "grad_norm": 1.4873781204223633, "learning_rate": 0.00018871171823381836, "loss": 1.9714, "step": 4974 }, { "epoch": 0.17816534460221678, "grad_norm": 1.8465592861175537, "learning_rate": 0.00018870636417922662, "loss": 1.4609, "step": 4975 }, { "epoch": 0.17820115673178505, "grad_norm": 2.0763323307037354, "learning_rate": 0.00018870100893120516, "loss": 1.8321, "step": 4976 }, { "epoch": 0.17823696886135335, "grad_norm": 1.5704597234725952, "learning_rate": 0.00018869565248982607, "loss": 1.4162, "step": 4977 }, { "epoch": 0.17827278099092161, "grad_norm": 1.4963994026184082, "learning_rate": 0.00018869029485516135, "loss": 1.2262, "step": 4978 }, { "epoch": 0.1783085931204899, "grad_norm": 1.768388271331787, "learning_rate": 0.0001886849360272831, "loss": 1.5942, "step": 4979 }, { "epoch": 0.1783444052500582, "grad_norm": 1.202764868736267, "learning_rate": 0.00018867957600626344, "loss": 1.7769, "step": 4980 }, { "epoch": 0.17838021737962648, "grad_norm": 1.4093730449676514, "learning_rate": 0.0001886742147921745, "loss": 1.6073, "step": 4981 }, { "epoch": 0.17841602950919477, "grad_norm": 1.5123848915100098, "learning_rate": 0.0001886688523850884, "loss": 1.5704, "step": 4982 }, { "epoch": 0.17845184163876304, "grad_norm": 1.5131558179855347, "learning_rate": 0.0001886634887850772, "loss": 1.7871, "step": 4983 }, { "epoch": 0.17848765376833134, "grad_norm": 1.548387050628662, "learning_rate": 0.00018865812399221317, "loss": 1.6158, "step": 4984 }, { "epoch": 0.1785234658978996, "grad_norm": 1.310096263885498, "learning_rate": 0.00018865275800656844, "loss": 1.6574, "step": 4985 }, { "epoch": 0.1785592780274679, "grad_norm": 1.429352045059204, "learning_rate": 0.00018864739082821518, "loss": 1.6101, "step": 4986 }, { "epoch": 0.1785950901570362, "grad_norm": 1.6432214975357056, "learning_rate": 0.00018864202245722566, "loss": 1.7259, "step": 4987 }, { "epoch": 0.17863090228660447, "grad_norm": 1.8350661993026733, "learning_rate": 0.00018863665289367204, "loss": 1.6486, "step": 4988 }, { "epoch": 0.17866671441617277, "grad_norm": 1.4883739948272705, "learning_rate": 0.0001886312821376266, "loss": 1.5008, "step": 4989 }, { "epoch": 0.17870252654574104, "grad_norm": 1.758785605430603, "learning_rate": 0.00018862591018916155, "loss": 1.5733, "step": 4990 }, { "epoch": 0.17873833867530933, "grad_norm": 1.595152497291565, "learning_rate": 0.00018862053704834925, "loss": 1.4687, "step": 4991 }, { "epoch": 0.1787741508048776, "grad_norm": 2.3240230083465576, "learning_rate": 0.00018861516271526191, "loss": 1.4684, "step": 4992 }, { "epoch": 0.1788099629344459, "grad_norm": 1.5655384063720703, "learning_rate": 0.00018860978718997185, "loss": 1.8508, "step": 4993 }, { "epoch": 0.1788457750640142, "grad_norm": 1.9290118217468262, "learning_rate": 0.00018860441047255144, "loss": 1.6378, "step": 4994 }, { "epoch": 0.17888158719358246, "grad_norm": 1.8785892724990845, "learning_rate": 0.00018859903256307297, "loss": 1.8559, "step": 4995 }, { "epoch": 0.17891739932315076, "grad_norm": 1.6702314615249634, "learning_rate": 0.00018859365346160877, "loss": 1.6963, "step": 4996 }, { "epoch": 0.17895321145271903, "grad_norm": 2.097177743911743, "learning_rate": 0.00018858827316823126, "loss": 1.4722, "step": 4997 }, { "epoch": 0.17898902358228733, "grad_norm": 1.577393889427185, "learning_rate": 0.0001885828916830128, "loss": 1.5131, "step": 4998 }, { "epoch": 0.1790248357118556, "grad_norm": 2.048555374145508, "learning_rate": 0.00018857750900602583, "loss": 1.8445, "step": 4999 }, { "epoch": 0.1790606478414239, "grad_norm": 1.83433997631073, "learning_rate": 0.00018857212513734268, "loss": 1.348, "step": 5000 }, { "epoch": 0.17909645997099216, "grad_norm": 1.6511319875717163, "learning_rate": 0.00018856674007703585, "loss": 1.7947, "step": 5001 }, { "epoch": 0.17913227210056046, "grad_norm": 1.9509414434432983, "learning_rate": 0.0001885613538251778, "loss": 1.6304, "step": 5002 }, { "epoch": 0.17916808423012875, "grad_norm": 1.4391506910324097, "learning_rate": 0.00018855596638184095, "loss": 1.5117, "step": 5003 }, { "epoch": 0.17920389635969702, "grad_norm": 1.9456406831741333, "learning_rate": 0.0001885505777470978, "loss": 1.6562, "step": 5004 }, { "epoch": 0.17923970848926532, "grad_norm": 1.2658982276916504, "learning_rate": 0.00018854518792102084, "loss": 1.4669, "step": 5005 }, { "epoch": 0.1792755206188336, "grad_norm": 1.4607932567596436, "learning_rate": 0.00018853979690368263, "loss": 1.8091, "step": 5006 }, { "epoch": 0.17931133274840189, "grad_norm": 1.4325487613677979, "learning_rate": 0.0001885344046951556, "loss": 1.6542, "step": 5007 }, { "epoch": 0.17934714487797015, "grad_norm": 1.477703332901001, "learning_rate": 0.00018852901129551243, "loss": 1.5072, "step": 5008 }, { "epoch": 0.17938295700753845, "grad_norm": 2.172150135040283, "learning_rate": 0.00018852361670482556, "loss": 1.7455, "step": 5009 }, { "epoch": 0.17941876913710675, "grad_norm": 1.7485425472259521, "learning_rate": 0.00018851822092316763, "loss": 1.8077, "step": 5010 }, { "epoch": 0.17945458126667502, "grad_norm": 1.443163514137268, "learning_rate": 0.00018851282395061122, "loss": 1.3942, "step": 5011 }, { "epoch": 0.17949039339624331, "grad_norm": 1.8629781007766724, "learning_rate": 0.00018850742578722894, "loss": 1.6342, "step": 5012 }, { "epoch": 0.17952620552581158, "grad_norm": 2.062121629714966, "learning_rate": 0.00018850202643309338, "loss": 1.6435, "step": 5013 }, { "epoch": 0.17956201765537988, "grad_norm": 1.654205083847046, "learning_rate": 0.00018849662588827723, "loss": 1.4998, "step": 5014 }, { "epoch": 0.17959782978494815, "grad_norm": 2.017118453979492, "learning_rate": 0.00018849122415285315, "loss": 1.6594, "step": 5015 }, { "epoch": 0.17963364191451645, "grad_norm": 1.514277696609497, "learning_rate": 0.00018848582122689376, "loss": 1.5694, "step": 5016 }, { "epoch": 0.17966945404408474, "grad_norm": 1.299932599067688, "learning_rate": 0.0001884804171104718, "loss": 1.6352, "step": 5017 }, { "epoch": 0.179705266173653, "grad_norm": 2.4143078327178955, "learning_rate": 0.00018847501180365995, "loss": 1.9637, "step": 5018 }, { "epoch": 0.1797410783032213, "grad_norm": 1.4530065059661865, "learning_rate": 0.000188469605306531, "loss": 1.6145, "step": 5019 }, { "epoch": 0.17977689043278958, "grad_norm": 1.9624786376953125, "learning_rate": 0.00018846419761915753, "loss": 1.3612, "step": 5020 }, { "epoch": 0.17981270256235787, "grad_norm": 1.5147650241851807, "learning_rate": 0.00018845878874161249, "loss": 1.127, "step": 5021 }, { "epoch": 0.17984851469192614, "grad_norm": 1.562705636024475, "learning_rate": 0.00018845337867396848, "loss": 1.6414, "step": 5022 }, { "epoch": 0.17988432682149444, "grad_norm": 1.8817349672317505, "learning_rate": 0.0001884479674162984, "loss": 1.8362, "step": 5023 }, { "epoch": 0.17992013895106274, "grad_norm": 1.2586948871612549, "learning_rate": 0.00018844255496867497, "loss": 1.4049, "step": 5024 }, { "epoch": 0.179955951080631, "grad_norm": 1.6511366367340088, "learning_rate": 0.00018843714133117106, "loss": 1.6536, "step": 5025 }, { "epoch": 0.1799917632101993, "grad_norm": 1.656224012374878, "learning_rate": 0.0001884317265038595, "loss": 1.6308, "step": 5026 }, { "epoch": 0.18002757533976757, "grad_norm": 2.134478807449341, "learning_rate": 0.00018842631048681313, "loss": 1.3212, "step": 5027 }, { "epoch": 0.18006338746933587, "grad_norm": 1.6021968126296997, "learning_rate": 0.00018842089328010482, "loss": 1.7667, "step": 5028 }, { "epoch": 0.18009919959890414, "grad_norm": 2.7165796756744385, "learning_rate": 0.00018841547488380745, "loss": 1.7146, "step": 5029 }, { "epoch": 0.18013501172847243, "grad_norm": 1.9333155155181885, "learning_rate": 0.00018841005529799388, "loss": 1.6024, "step": 5030 }, { "epoch": 0.18017082385804073, "grad_norm": 1.365800380706787, "learning_rate": 0.00018840463452273707, "loss": 1.4516, "step": 5031 }, { "epoch": 0.180206635987609, "grad_norm": 1.4623908996582031, "learning_rate": 0.00018839921255810996, "loss": 1.6984, "step": 5032 }, { "epoch": 0.1802424481171773, "grad_norm": 1.6998884677886963, "learning_rate": 0.00018839378940418544, "loss": 1.4623, "step": 5033 }, { "epoch": 0.18027826024674556, "grad_norm": 2.1139144897460938, "learning_rate": 0.00018838836506103652, "loss": 1.5678, "step": 5034 }, { "epoch": 0.18031407237631386, "grad_norm": 2.4886868000030518, "learning_rate": 0.00018838293952873616, "loss": 1.8086, "step": 5035 }, { "epoch": 0.18034988450588213, "grad_norm": 1.4761202335357666, "learning_rate": 0.0001883775128073573, "loss": 1.8566, "step": 5036 }, { "epoch": 0.18038569663545043, "grad_norm": 1.5243207216262817, "learning_rate": 0.0001883720848969731, "loss": 1.5345, "step": 5037 }, { "epoch": 0.18042150876501872, "grad_norm": 2.0490150451660156, "learning_rate": 0.00018836665579765642, "loss": 1.3139, "step": 5038 }, { "epoch": 0.180457320894587, "grad_norm": 1.9801737070083618, "learning_rate": 0.0001883612255094804, "loss": 1.6783, "step": 5039 }, { "epoch": 0.1804931330241553, "grad_norm": 1.588937520980835, "learning_rate": 0.00018835579403251806, "loss": 1.4642, "step": 5040 }, { "epoch": 0.18052894515372356, "grad_norm": 1.7436013221740723, "learning_rate": 0.00018835036136684248, "loss": 1.7724, "step": 5041 }, { "epoch": 0.18056475728329185, "grad_norm": 1.4341626167297363, "learning_rate": 0.00018834492751252678, "loss": 1.5823, "step": 5042 }, { "epoch": 0.18060056941286012, "grad_norm": 1.5773844718933105, "learning_rate": 0.000188339492469644, "loss": 1.5664, "step": 5043 }, { "epoch": 0.18063638154242842, "grad_norm": 2.491431474685669, "learning_rate": 0.0001883340562382673, "loss": 2.0062, "step": 5044 }, { "epoch": 0.18067219367199672, "grad_norm": 1.8883206844329834, "learning_rate": 0.00018832861881846983, "loss": 1.6656, "step": 5045 }, { "epoch": 0.18070800580156499, "grad_norm": 1.7385650873184204, "learning_rate": 0.00018832318021032472, "loss": 1.637, "step": 5046 }, { "epoch": 0.18074381793113328, "grad_norm": 1.5121614933013916, "learning_rate": 0.00018831774041390517, "loss": 1.1038, "step": 5047 }, { "epoch": 0.18077963006070155, "grad_norm": 1.1999789476394653, "learning_rate": 0.00018831229942928434, "loss": 1.5409, "step": 5048 }, { "epoch": 0.18081544219026985, "grad_norm": 1.249335527420044, "learning_rate": 0.0001883068572565354, "loss": 1.3306, "step": 5049 }, { "epoch": 0.18085125431983812, "grad_norm": 1.4928827285766602, "learning_rate": 0.00018830141389573166, "loss": 1.699, "step": 5050 }, { "epoch": 0.18088706644940641, "grad_norm": 1.2425192594528198, "learning_rate": 0.00018829596934694624, "loss": 1.6774, "step": 5051 }, { "epoch": 0.1809228785789747, "grad_norm": 1.6975491046905518, "learning_rate": 0.0001882905236102525, "loss": 1.6176, "step": 5052 }, { "epoch": 0.18095869070854298, "grad_norm": 1.3166990280151367, "learning_rate": 0.0001882850766857236, "loss": 1.3818, "step": 5053 }, { "epoch": 0.18099450283811128, "grad_norm": 1.856614351272583, "learning_rate": 0.0001882796285734329, "loss": 1.5886, "step": 5054 }, { "epoch": 0.18103031496767955, "grad_norm": 1.364363670349121, "learning_rate": 0.0001882741792734537, "loss": 1.4846, "step": 5055 }, { "epoch": 0.18106612709724784, "grad_norm": 1.5802984237670898, "learning_rate": 0.00018826872878585925, "loss": 1.5028, "step": 5056 }, { "epoch": 0.1811019392268161, "grad_norm": 1.4129685163497925, "learning_rate": 0.0001882632771107229, "loss": 1.6633, "step": 5057 }, { "epoch": 0.1811377513563844, "grad_norm": 1.0658329725265503, "learning_rate": 0.00018825782424811802, "loss": 1.4006, "step": 5058 }, { "epoch": 0.1811735634859527, "grad_norm": 1.5389435291290283, "learning_rate": 0.00018825237019811796, "loss": 1.7775, "step": 5059 }, { "epoch": 0.18120937561552097, "grad_norm": 1.2066320180892944, "learning_rate": 0.0001882469149607961, "loss": 1.5404, "step": 5060 }, { "epoch": 0.18124518774508927, "grad_norm": 1.2888737916946411, "learning_rate": 0.00018824145853622582, "loss": 1.6816, "step": 5061 }, { "epoch": 0.18128099987465754, "grad_norm": 1.9192713499069214, "learning_rate": 0.00018823600092448054, "loss": 1.6496, "step": 5062 }, { "epoch": 0.18131681200422584, "grad_norm": 1.499048113822937, "learning_rate": 0.0001882305421256337, "loss": 1.4216, "step": 5063 }, { "epoch": 0.1813526241337941, "grad_norm": 1.7019413709640503, "learning_rate": 0.0001882250821397587, "loss": 1.5245, "step": 5064 }, { "epoch": 0.1813884362633624, "grad_norm": 1.9641376733779907, "learning_rate": 0.00018821962096692905, "loss": 1.622, "step": 5065 }, { "epoch": 0.1814242483929307, "grad_norm": 2.3913800716400146, "learning_rate": 0.00018821415860721818, "loss": 1.2029, "step": 5066 }, { "epoch": 0.18146006052249897, "grad_norm": 1.3694190979003906, "learning_rate": 0.0001882086950606996, "loss": 1.6648, "step": 5067 }, { "epoch": 0.18149587265206726, "grad_norm": 1.5379868745803833, "learning_rate": 0.0001882032303274468, "loss": 1.4357, "step": 5068 }, { "epoch": 0.18153168478163553, "grad_norm": 1.7823456525802612, "learning_rate": 0.0001881977644075333, "loss": 1.4252, "step": 5069 }, { "epoch": 0.18156749691120383, "grad_norm": 1.8627572059631348, "learning_rate": 0.00018819229730103267, "loss": 1.6875, "step": 5070 }, { "epoch": 0.1816033090407721, "grad_norm": 1.8082793951034546, "learning_rate": 0.00018818682900801842, "loss": 1.451, "step": 5071 }, { "epoch": 0.1816391211703404, "grad_norm": 1.3875577449798584, "learning_rate": 0.00018818135952856414, "loss": 1.6158, "step": 5072 }, { "epoch": 0.1816749332999087, "grad_norm": 1.2505030632019043, "learning_rate": 0.00018817588886274345, "loss": 1.7009, "step": 5073 }, { "epoch": 0.18171074542947696, "grad_norm": 1.660299301147461, "learning_rate": 0.00018817041701062987, "loss": 1.666, "step": 5074 }, { "epoch": 0.18174655755904526, "grad_norm": 1.854568600654602, "learning_rate": 0.00018816494397229708, "loss": 1.6123, "step": 5075 }, { "epoch": 0.18178236968861353, "grad_norm": 2.6197986602783203, "learning_rate": 0.0001881594697478187, "loss": 1.5637, "step": 5076 }, { "epoch": 0.18181818181818182, "grad_norm": 2.150740623474121, "learning_rate": 0.00018815399433726835, "loss": 1.6449, "step": 5077 }, { "epoch": 0.1818539939477501, "grad_norm": 3.9189467430114746, "learning_rate": 0.00018814851774071972, "loss": 1.6728, "step": 5078 }, { "epoch": 0.1818898060773184, "grad_norm": 1.6135289669036865, "learning_rate": 0.00018814303995824653, "loss": 1.6037, "step": 5079 }, { "epoch": 0.18192561820688669, "grad_norm": 1.3914783000946045, "learning_rate": 0.00018813756098992238, "loss": 1.5109, "step": 5080 }, { "epoch": 0.18196143033645495, "grad_norm": 1.7650524377822876, "learning_rate": 0.00018813208083582106, "loss": 1.44, "step": 5081 }, { "epoch": 0.18199724246602325, "grad_norm": 1.4060581922531128, "learning_rate": 0.00018812659949601627, "loss": 1.5721, "step": 5082 }, { "epoch": 0.18203305459559152, "grad_norm": 1.284258246421814, "learning_rate": 0.00018812111697058174, "loss": 1.5441, "step": 5083 }, { "epoch": 0.18206886672515982, "grad_norm": 2.5756237506866455, "learning_rate": 0.0001881156332595913, "loss": 1.463, "step": 5084 }, { "epoch": 0.18210467885472809, "grad_norm": 1.4949012994766235, "learning_rate": 0.00018811014836311865, "loss": 1.4694, "step": 5085 }, { "epoch": 0.18214049098429638, "grad_norm": 1.3897700309753418, "learning_rate": 0.00018810466228123758, "loss": 1.5629, "step": 5086 }, { "epoch": 0.18217630311386468, "grad_norm": 1.605708360671997, "learning_rate": 0.00018809917501402197, "loss": 1.4193, "step": 5087 }, { "epoch": 0.18221211524343295, "grad_norm": 1.3950620889663696, "learning_rate": 0.00018809368656154556, "loss": 1.4448, "step": 5088 }, { "epoch": 0.18224792737300125, "grad_norm": 1.950050950050354, "learning_rate": 0.00018808819692388225, "loss": 1.4997, "step": 5089 }, { "epoch": 0.18228373950256951, "grad_norm": 1.9414753913879395, "learning_rate": 0.00018808270610110584, "loss": 1.3708, "step": 5090 }, { "epoch": 0.1823195516321378, "grad_norm": 1.3951416015625, "learning_rate": 0.0001880772140932903, "loss": 1.6992, "step": 5091 }, { "epoch": 0.18235536376170608, "grad_norm": 1.6158993244171143, "learning_rate": 0.00018807172090050945, "loss": 1.6278, "step": 5092 }, { "epoch": 0.18239117589127438, "grad_norm": 1.6947451829910278, "learning_rate": 0.00018806622652283713, "loss": 1.5909, "step": 5093 }, { "epoch": 0.18242698802084267, "grad_norm": 1.4441710710525513, "learning_rate": 0.0001880607309603474, "loss": 1.4248, "step": 5094 }, { "epoch": 0.18246280015041094, "grad_norm": 1.387609839439392, "learning_rate": 0.00018805523421311411, "loss": 1.6221, "step": 5095 }, { "epoch": 0.18249861227997924, "grad_norm": 1.3998216390609741, "learning_rate": 0.0001880497362812112, "loss": 1.7985, "step": 5096 }, { "epoch": 0.1825344244095475, "grad_norm": 2.511861562728882, "learning_rate": 0.00018804423716471268, "loss": 1.4662, "step": 5097 }, { "epoch": 0.1825702365391158, "grad_norm": 1.66793954372406, "learning_rate": 0.00018803873686369253, "loss": 1.3022, "step": 5098 }, { "epoch": 0.18260604866868407, "grad_norm": 1.4244165420532227, "learning_rate": 0.00018803323537822472, "loss": 1.5109, "step": 5099 }, { "epoch": 0.18264186079825237, "grad_norm": 1.2689194679260254, "learning_rate": 0.00018802773270838329, "loss": 1.4388, "step": 5100 }, { "epoch": 0.18267767292782064, "grad_norm": 1.5355168581008911, "learning_rate": 0.0001880222288542423, "loss": 1.3934, "step": 5101 }, { "epoch": 0.18271348505738894, "grad_norm": 1.695492148399353, "learning_rate": 0.0001880167238158757, "loss": 1.6925, "step": 5102 }, { "epoch": 0.18274929718695723, "grad_norm": 1.5693861246109009, "learning_rate": 0.00018801121759335764, "loss": 1.6833, "step": 5103 }, { "epoch": 0.1827851093165255, "grad_norm": 1.766347050666809, "learning_rate": 0.0001880057101867622, "loss": 1.4459, "step": 5104 }, { "epoch": 0.1828209214460938, "grad_norm": 1.3743687868118286, "learning_rate": 0.00018800020159616342, "loss": 1.7515, "step": 5105 }, { "epoch": 0.18285673357566207, "grad_norm": 1.5413109064102173, "learning_rate": 0.00018799469182163544, "loss": 1.5712, "step": 5106 }, { "epoch": 0.18289254570523036, "grad_norm": 2.0827620029449463, "learning_rate": 0.00018798918086325236, "loss": 1.8366, "step": 5107 }, { "epoch": 0.18292835783479863, "grad_norm": 1.954444408416748, "learning_rate": 0.0001879836687210884, "loss": 1.8004, "step": 5108 }, { "epoch": 0.18296416996436693, "grad_norm": 1.6314371824264526, "learning_rate": 0.00018797815539521763, "loss": 1.6223, "step": 5109 }, { "epoch": 0.18299998209393523, "grad_norm": 1.643937349319458, "learning_rate": 0.00018797264088571427, "loss": 1.9055, "step": 5110 }, { "epoch": 0.1830357942235035, "grad_norm": 2.121596574783325, "learning_rate": 0.0001879671251926525, "loss": 1.7183, "step": 5111 }, { "epoch": 0.1830716063530718, "grad_norm": 1.6858540773391724, "learning_rate": 0.00018796160831610655, "loss": 1.7874, "step": 5112 }, { "epoch": 0.18310741848264006, "grad_norm": 1.651357889175415, "learning_rate": 0.00018795609025615062, "loss": 1.6096, "step": 5113 }, { "epoch": 0.18314323061220836, "grad_norm": 1.636696457862854, "learning_rate": 0.00018795057101285895, "loss": 1.7267, "step": 5114 }, { "epoch": 0.18317904274177663, "grad_norm": 2.019561529159546, "learning_rate": 0.0001879450505863058, "loss": 1.5251, "step": 5115 }, { "epoch": 0.18321485487134492, "grad_norm": 1.834036946296692, "learning_rate": 0.00018793952897656544, "loss": 1.4337, "step": 5116 }, { "epoch": 0.18325066700091322, "grad_norm": 1.3989968299865723, "learning_rate": 0.00018793400618371213, "loss": 1.2123, "step": 5117 }, { "epoch": 0.1832864791304815, "grad_norm": 1.2780959606170654, "learning_rate": 0.0001879284822078202, "loss": 1.4671, "step": 5118 }, { "epoch": 0.18332229126004979, "grad_norm": 2.15604567527771, "learning_rate": 0.00018792295704896396, "loss": 1.5538, "step": 5119 }, { "epoch": 0.18335810338961805, "grad_norm": 1.4170936346054077, "learning_rate": 0.00018791743070721776, "loss": 1.9131, "step": 5120 }, { "epoch": 0.18339391551918635, "grad_norm": 1.7687115669250488, "learning_rate": 0.0001879119031826559, "loss": 1.6419, "step": 5121 }, { "epoch": 0.18342972764875462, "grad_norm": 1.4007076025009155, "learning_rate": 0.00018790637447535283, "loss": 1.701, "step": 5122 }, { "epoch": 0.18346553977832292, "grad_norm": 1.8551342487335205, "learning_rate": 0.00018790084458538285, "loss": 1.7679, "step": 5123 }, { "epoch": 0.1835013519078912, "grad_norm": 1.933009386062622, "learning_rate": 0.0001878953135128204, "loss": 1.6205, "step": 5124 }, { "epoch": 0.18353716403745948, "grad_norm": 1.9234230518341064, "learning_rate": 0.00018788978125773987, "loss": 1.4022, "step": 5125 }, { "epoch": 0.18357297616702778, "grad_norm": 1.5829157829284668, "learning_rate": 0.0001878842478202157, "loss": 1.4791, "step": 5126 }, { "epoch": 0.18360878829659605, "grad_norm": 1.4500535726547241, "learning_rate": 0.00018787871320032236, "loss": 1.6684, "step": 5127 }, { "epoch": 0.18364460042616434, "grad_norm": 2.3648834228515625, "learning_rate": 0.0001878731773981343, "loss": 1.9178, "step": 5128 }, { "epoch": 0.18368041255573261, "grad_norm": 2.026014566421509, "learning_rate": 0.00018786764041372594, "loss": 1.7238, "step": 5129 }, { "epoch": 0.1837162246853009, "grad_norm": 2.3626532554626465, "learning_rate": 0.00018786210224717184, "loss": 1.6493, "step": 5130 }, { "epoch": 0.1837520368148692, "grad_norm": 1.9708654880523682, "learning_rate": 0.0001878565628985465, "loss": 1.7803, "step": 5131 }, { "epoch": 0.18378784894443748, "grad_norm": 2.1790413856506348, "learning_rate": 0.00018785102236792444, "loss": 1.5954, "step": 5132 }, { "epoch": 0.18382366107400577, "grad_norm": 1.556118369102478, "learning_rate": 0.00018784548065538018, "loss": 1.7467, "step": 5133 }, { "epoch": 0.18385947320357404, "grad_norm": 1.646291971206665, "learning_rate": 0.0001878399377609883, "loss": 1.3901, "step": 5134 }, { "epoch": 0.18389528533314234, "grad_norm": 1.8888112306594849, "learning_rate": 0.00018783439368482335, "loss": 1.8621, "step": 5135 }, { "epoch": 0.1839310974627106, "grad_norm": 1.5839077234268188, "learning_rate": 0.00018782884842695992, "loss": 1.3945, "step": 5136 }, { "epoch": 0.1839669095922789, "grad_norm": 1.7424887418746948, "learning_rate": 0.00018782330198747265, "loss": 1.5983, "step": 5137 }, { "epoch": 0.1840027217218472, "grad_norm": 1.7385514974594116, "learning_rate": 0.0001878177543664361, "loss": 1.8294, "step": 5138 }, { "epoch": 0.18403853385141547, "grad_norm": 1.573832392692566, "learning_rate": 0.00018781220556392497, "loss": 1.6636, "step": 5139 }, { "epoch": 0.18407434598098377, "grad_norm": 2.315880537033081, "learning_rate": 0.00018780665558001388, "loss": 1.6846, "step": 5140 }, { "epoch": 0.18411015811055204, "grad_norm": 1.4497591257095337, "learning_rate": 0.00018780110441477752, "loss": 1.4998, "step": 5141 }, { "epoch": 0.18414597024012033, "grad_norm": 1.6672792434692383, "learning_rate": 0.00018779555206829054, "loss": 1.6283, "step": 5142 }, { "epoch": 0.1841817823696886, "grad_norm": 1.4344167709350586, "learning_rate": 0.00018778999854062765, "loss": 1.3823, "step": 5143 }, { "epoch": 0.1842175944992569, "grad_norm": 1.8563214540481567, "learning_rate": 0.00018778444383186357, "loss": 1.5636, "step": 5144 }, { "epoch": 0.1842534066288252, "grad_norm": 1.5528720617294312, "learning_rate": 0.00018777888794207302, "loss": 1.5238, "step": 5145 }, { "epoch": 0.18428921875839346, "grad_norm": 1.825433373451233, "learning_rate": 0.0001877733308713308, "loss": 1.3471, "step": 5146 }, { "epoch": 0.18432503088796176, "grad_norm": 1.9533745050430298, "learning_rate": 0.00018776777261971162, "loss": 1.5054, "step": 5147 }, { "epoch": 0.18436084301753003, "grad_norm": 2.1287336349487305, "learning_rate": 0.00018776221318729026, "loss": 1.8556, "step": 5148 }, { "epoch": 0.18439665514709833, "grad_norm": 3.0732877254486084, "learning_rate": 0.00018775665257414153, "loss": 1.6433, "step": 5149 }, { "epoch": 0.1844324672766666, "grad_norm": 1.8303265571594238, "learning_rate": 0.00018775109078034022, "loss": 1.4745, "step": 5150 }, { "epoch": 0.1844682794062349, "grad_norm": 1.7558493614196777, "learning_rate": 0.00018774552780596117, "loss": 1.6524, "step": 5151 }, { "epoch": 0.1845040915358032, "grad_norm": 1.354691505432129, "learning_rate": 0.00018773996365107926, "loss": 1.0427, "step": 5152 }, { "epoch": 0.18453990366537146, "grad_norm": 1.8739055395126343, "learning_rate": 0.00018773439831576929, "loss": 1.2968, "step": 5153 }, { "epoch": 0.18457571579493975, "grad_norm": 1.5816646814346313, "learning_rate": 0.00018772883180010616, "loss": 1.5516, "step": 5154 }, { "epoch": 0.18461152792450802, "grad_norm": 1.3845489025115967, "learning_rate": 0.0001877232641041648, "loss": 1.3547, "step": 5155 }, { "epoch": 0.18464734005407632, "grad_norm": 1.0942845344543457, "learning_rate": 0.00018771769522802004, "loss": 1.6058, "step": 5156 }, { "epoch": 0.1846831521836446, "grad_norm": 2.4216606616973877, "learning_rate": 0.00018771212517174686, "loss": 1.8014, "step": 5157 }, { "epoch": 0.18471896431321289, "grad_norm": 2.278639554977417, "learning_rate": 0.00018770655393542012, "loss": 1.574, "step": 5158 }, { "epoch": 0.18475477644278118, "grad_norm": 1.3788586854934692, "learning_rate": 0.0001877009815191149, "loss": 1.4745, "step": 5159 }, { "epoch": 0.18479058857234945, "grad_norm": 1.4675894975662231, "learning_rate": 0.00018769540792290608, "loss": 1.5777, "step": 5160 }, { "epoch": 0.18482640070191775, "grad_norm": 1.862516164779663, "learning_rate": 0.00018768983314686866, "loss": 1.5242, "step": 5161 }, { "epoch": 0.18486221283148602, "grad_norm": 1.9851064682006836, "learning_rate": 0.00018768425719107765, "loss": 1.5826, "step": 5162 }, { "epoch": 0.1848980249610543, "grad_norm": 1.6969146728515625, "learning_rate": 0.00018767868005560806, "loss": 1.6246, "step": 5163 }, { "epoch": 0.18493383709062258, "grad_norm": 1.3387763500213623, "learning_rate": 0.0001876731017405349, "loss": 1.4774, "step": 5164 }, { "epoch": 0.18496964922019088, "grad_norm": 1.9716835021972656, "learning_rate": 0.0001876675222459333, "loss": 1.3885, "step": 5165 }, { "epoch": 0.18500546134975918, "grad_norm": 3.473733901977539, "learning_rate": 0.0001876619415718782, "loss": 1.4541, "step": 5166 }, { "epoch": 0.18504127347932744, "grad_norm": 1.877964973449707, "learning_rate": 0.00018765635971844483, "loss": 1.563, "step": 5167 }, { "epoch": 0.18507708560889574, "grad_norm": 2.445122718811035, "learning_rate": 0.00018765077668570816, "loss": 1.7484, "step": 5168 }, { "epoch": 0.185112897738464, "grad_norm": 1.877567172050476, "learning_rate": 0.00018764519247374336, "loss": 1.5732, "step": 5169 }, { "epoch": 0.1851487098680323, "grad_norm": 2.211268663406372, "learning_rate": 0.00018763960708262557, "loss": 1.684, "step": 5170 }, { "epoch": 0.18518452199760058, "grad_norm": 2.7970688343048096, "learning_rate": 0.0001876340205124299, "loss": 1.4222, "step": 5171 }, { "epoch": 0.18522033412716887, "grad_norm": 1.5880224704742432, "learning_rate": 0.00018762843276323151, "loss": 1.824, "step": 5172 }, { "epoch": 0.18525614625673717, "grad_norm": 1.6042425632476807, "learning_rate": 0.0001876228438351056, "loss": 1.6025, "step": 5173 }, { "epoch": 0.18529195838630544, "grad_norm": 1.544500470161438, "learning_rate": 0.00018761725372812735, "loss": 1.7088, "step": 5174 }, { "epoch": 0.18532777051587374, "grad_norm": 2.324397325515747, "learning_rate": 0.00018761166244237197, "loss": 1.5568, "step": 5175 }, { "epoch": 0.185363582645442, "grad_norm": 3.774132013320923, "learning_rate": 0.00018760606997791468, "loss": 1.2232, "step": 5176 }, { "epoch": 0.1853993947750103, "grad_norm": 1.6885324716567993, "learning_rate": 0.00018760047633483074, "loss": 1.7845, "step": 5177 }, { "epoch": 0.18543520690457857, "grad_norm": 1.3358042240142822, "learning_rate": 0.00018759488151319539, "loss": 1.4645, "step": 5178 }, { "epoch": 0.18547101903414687, "grad_norm": 1.6287620067596436, "learning_rate": 0.00018758928551308385, "loss": 1.5811, "step": 5179 }, { "epoch": 0.18550683116371516, "grad_norm": 1.919089674949646, "learning_rate": 0.0001875836883345715, "loss": 1.4477, "step": 5180 }, { "epoch": 0.18554264329328343, "grad_norm": 1.681052803993225, "learning_rate": 0.00018757808997773358, "loss": 1.3891, "step": 5181 }, { "epoch": 0.18557845542285173, "grad_norm": 1.7119311094284058, "learning_rate": 0.00018757249044264542, "loss": 1.5623, "step": 5182 }, { "epoch": 0.18561426755242, "grad_norm": 1.74424147605896, "learning_rate": 0.00018756688972938239, "loss": 1.5426, "step": 5183 }, { "epoch": 0.1856500796819883, "grad_norm": 1.6085398197174072, "learning_rate": 0.0001875612878380198, "loss": 1.9637, "step": 5184 }, { "epoch": 0.18568589181155656, "grad_norm": 1.232580542564392, "learning_rate": 0.00018755568476863302, "loss": 1.7271, "step": 5185 }, { "epoch": 0.18572170394112486, "grad_norm": 1.6930204629898071, "learning_rate": 0.00018755008052129743, "loss": 1.7984, "step": 5186 }, { "epoch": 0.18575751607069316, "grad_norm": 2.1576590538024902, "learning_rate": 0.00018754447509608847, "loss": 1.378, "step": 5187 }, { "epoch": 0.18579332820026143, "grad_norm": 1.799008846282959, "learning_rate": 0.0001875388684930815, "loss": 1.2764, "step": 5188 }, { "epoch": 0.18582914032982972, "grad_norm": 1.5881048440933228, "learning_rate": 0.00018753326071235197, "loss": 1.6597, "step": 5189 }, { "epoch": 0.185864952459398, "grad_norm": 2.208606719970703, "learning_rate": 0.00018752765175397533, "loss": 1.6626, "step": 5190 }, { "epoch": 0.1859007645889663, "grad_norm": 1.4758398532867432, "learning_rate": 0.00018752204161802706, "loss": 1.6252, "step": 5191 }, { "epoch": 0.18593657671853456, "grad_norm": 1.7125110626220703, "learning_rate": 0.00018751643030458256, "loss": 1.5789, "step": 5192 }, { "epoch": 0.18597238884810285, "grad_norm": 2.279865264892578, "learning_rate": 0.00018751081781371743, "loss": 1.5543, "step": 5193 }, { "epoch": 0.18600820097767115, "grad_norm": 1.559960126876831, "learning_rate": 0.00018750520414550711, "loss": 1.4844, "step": 5194 }, { "epoch": 0.18604401310723942, "grad_norm": 1.2859904766082764, "learning_rate": 0.00018749958930002717, "loss": 1.5597, "step": 5195 }, { "epoch": 0.18607982523680772, "grad_norm": 2.042282819747925, "learning_rate": 0.00018749397327735308, "loss": 1.2362, "step": 5196 }, { "epoch": 0.18611563736637599, "grad_norm": 2.044483184814453, "learning_rate": 0.00018748835607756045, "loss": 1.631, "step": 5197 }, { "epoch": 0.18615144949594428, "grad_norm": 1.2718511819839478, "learning_rate": 0.00018748273770072485, "loss": 1.5658, "step": 5198 }, { "epoch": 0.18618726162551255, "grad_norm": 1.6728475093841553, "learning_rate": 0.00018747711814692185, "loss": 1.897, "step": 5199 }, { "epoch": 0.18622307375508085, "grad_norm": 1.7691491842269897, "learning_rate": 0.00018747149741622706, "loss": 1.6203, "step": 5200 }, { "epoch": 0.18625888588464912, "grad_norm": 1.5065457820892334, "learning_rate": 0.0001874658755087161, "loss": 1.805, "step": 5201 }, { "epoch": 0.1862946980142174, "grad_norm": 1.8144469261169434, "learning_rate": 0.00018746025242446463, "loss": 1.5208, "step": 5202 }, { "epoch": 0.1863305101437857, "grad_norm": 1.7388842105865479, "learning_rate": 0.00018745462816354826, "loss": 1.4172, "step": 5203 }, { "epoch": 0.18636632227335398, "grad_norm": 1.3475241661071777, "learning_rate": 0.0001874490027260427, "loss": 1.6845, "step": 5204 }, { "epoch": 0.18640213440292228, "grad_norm": 2.016415596008301, "learning_rate": 0.0001874433761120236, "loss": 1.4557, "step": 5205 }, { "epoch": 0.18643794653249054, "grad_norm": 1.5078966617584229, "learning_rate": 0.00018743774832156667, "loss": 1.5403, "step": 5206 }, { "epoch": 0.18647375866205884, "grad_norm": 1.3218117952346802, "learning_rate": 0.0001874321193547476, "loss": 1.5534, "step": 5207 }, { "epoch": 0.1865095707916271, "grad_norm": 1.9156447649002075, "learning_rate": 0.00018742648921164215, "loss": 1.4724, "step": 5208 }, { "epoch": 0.1865453829211954, "grad_norm": 2.4503142833709717, "learning_rate": 0.00018742085789232607, "loss": 1.4975, "step": 5209 }, { "epoch": 0.1865811950507637, "grad_norm": 1.653955101966858, "learning_rate": 0.0001874152253968751, "loss": 1.5087, "step": 5210 }, { "epoch": 0.18661700718033197, "grad_norm": 1.6351701021194458, "learning_rate": 0.00018740959172536506, "loss": 1.471, "step": 5211 }, { "epoch": 0.18665281930990027, "grad_norm": 1.942446231842041, "learning_rate": 0.0001874039568778717, "loss": 1.5784, "step": 5212 }, { "epoch": 0.18668863143946854, "grad_norm": 1.5409560203552246, "learning_rate": 0.0001873983208544708, "loss": 1.7789, "step": 5213 }, { "epoch": 0.18672444356903684, "grad_norm": 1.5599496364593506, "learning_rate": 0.00018739268365523828, "loss": 1.3787, "step": 5214 }, { "epoch": 0.1867602556986051, "grad_norm": 2.1475934982299805, "learning_rate": 0.00018738704528024994, "loss": 1.8485, "step": 5215 }, { "epoch": 0.1867960678281734, "grad_norm": 1.8166731595993042, "learning_rate": 0.00018738140572958155, "loss": 1.5655, "step": 5216 }, { "epoch": 0.1868318799577417, "grad_norm": 2.244706630706787, "learning_rate": 0.00018737576500330914, "loss": 1.4576, "step": 5217 }, { "epoch": 0.18686769208730997, "grad_norm": 1.2443066835403442, "learning_rate": 0.00018737012310150847, "loss": 1.3797, "step": 5218 }, { "epoch": 0.18690350421687826, "grad_norm": 1.4576160907745361, "learning_rate": 0.00018736448002425554, "loss": 1.4719, "step": 5219 }, { "epoch": 0.18693931634644653, "grad_norm": 1.2665461301803589, "learning_rate": 0.00018735883577162619, "loss": 1.5914, "step": 5220 }, { "epoch": 0.18697512847601483, "grad_norm": 1.9680484533309937, "learning_rate": 0.0001873531903436964, "loss": 1.7045, "step": 5221 }, { "epoch": 0.1870109406055831, "grad_norm": 1.387949824333191, "learning_rate": 0.00018734754374054207, "loss": 1.4011, "step": 5222 }, { "epoch": 0.1870467527351514, "grad_norm": 1.9003175497055054, "learning_rate": 0.0001873418959622393, "loss": 1.443, "step": 5223 }, { "epoch": 0.1870825648647197, "grad_norm": 1.8332993984222412, "learning_rate": 0.0001873362470088639, "loss": 1.7366, "step": 5224 }, { "epoch": 0.18711837699428796, "grad_norm": 1.9775099754333496, "learning_rate": 0.00018733059688049198, "loss": 1.574, "step": 5225 }, { "epoch": 0.18715418912385626, "grad_norm": 2.4329521656036377, "learning_rate": 0.00018732494557719952, "loss": 1.8489, "step": 5226 }, { "epoch": 0.18719000125342453, "grad_norm": 1.4437566995620728, "learning_rate": 0.00018731929309906254, "loss": 1.4442, "step": 5227 }, { "epoch": 0.18722581338299282, "grad_norm": 1.4024875164031982, "learning_rate": 0.00018731363944615717, "loss": 1.2043, "step": 5228 }, { "epoch": 0.1872616255125611, "grad_norm": 1.692596197128296, "learning_rate": 0.00018730798461855938, "loss": 1.4935, "step": 5229 }, { "epoch": 0.1872974376421294, "grad_norm": 1.579940676689148, "learning_rate": 0.00018730232861634524, "loss": 1.5019, "step": 5230 }, { "epoch": 0.18733324977169769, "grad_norm": 1.8518500328063965, "learning_rate": 0.0001872966714395909, "loss": 1.5695, "step": 5231 }, { "epoch": 0.18736906190126595, "grad_norm": 1.7087852954864502, "learning_rate": 0.00018729101308837245, "loss": 1.844, "step": 5232 }, { "epoch": 0.18740487403083425, "grad_norm": 1.8843986988067627, "learning_rate": 0.000187285353562766, "loss": 1.4705, "step": 5233 }, { "epoch": 0.18744068616040252, "grad_norm": 1.5346555709838867, "learning_rate": 0.00018727969286284776, "loss": 1.4533, "step": 5234 }, { "epoch": 0.18747649828997082, "grad_norm": 1.526907205581665, "learning_rate": 0.0001872740309886938, "loss": 1.6521, "step": 5235 }, { "epoch": 0.18751231041953909, "grad_norm": 2.257167100906372, "learning_rate": 0.00018726836794038035, "loss": 1.4737, "step": 5236 }, { "epoch": 0.18754812254910738, "grad_norm": 2.471599817276001, "learning_rate": 0.00018726270371798357, "loss": 1.5256, "step": 5237 }, { "epoch": 0.18758393467867568, "grad_norm": 1.6534587144851685, "learning_rate": 0.00018725703832157966, "loss": 1.7539, "step": 5238 }, { "epoch": 0.18761974680824395, "grad_norm": 1.419296383857727, "learning_rate": 0.00018725137175124482, "loss": 1.5579, "step": 5239 }, { "epoch": 0.18765555893781224, "grad_norm": 1.8545119762420654, "learning_rate": 0.0001872457040070554, "loss": 1.6909, "step": 5240 }, { "epoch": 0.1876913710673805, "grad_norm": 1.6722489595413208, "learning_rate": 0.0001872400350890875, "loss": 1.4801, "step": 5241 }, { "epoch": 0.1877271831969488, "grad_norm": 1.4976879358291626, "learning_rate": 0.00018723436499741748, "loss": 1.3575, "step": 5242 }, { "epoch": 0.18776299532651708, "grad_norm": 1.9052339792251587, "learning_rate": 0.0001872286937321216, "loss": 1.2981, "step": 5243 }, { "epoch": 0.18779880745608538, "grad_norm": 1.9531632661819458, "learning_rate": 0.00018722302129327618, "loss": 1.4758, "step": 5244 }, { "epoch": 0.18783461958565367, "grad_norm": 1.5285097360610962, "learning_rate": 0.0001872173476809575, "loss": 1.39, "step": 5245 }, { "epoch": 0.18787043171522194, "grad_norm": 2.232374906539917, "learning_rate": 0.00018721167289524195, "loss": 1.7108, "step": 5246 }, { "epoch": 0.18790624384479024, "grad_norm": 1.175122618675232, "learning_rate": 0.0001872059969362058, "loss": 1.578, "step": 5247 }, { "epoch": 0.1879420559743585, "grad_norm": 1.2910648584365845, "learning_rate": 0.00018720031980392544, "loss": 1.4514, "step": 5248 }, { "epoch": 0.1879778681039268, "grad_norm": 2.2122230529785156, "learning_rate": 0.0001871946414984773, "loss": 1.5838, "step": 5249 }, { "epoch": 0.18801368023349507, "grad_norm": 1.6212471723556519, "learning_rate": 0.00018718896201993767, "loss": 1.103, "step": 5250 }, { "epoch": 0.18804949236306337, "grad_norm": 1.6619919538497925, "learning_rate": 0.00018718328136838305, "loss": 1.4881, "step": 5251 }, { "epoch": 0.18808530449263167, "grad_norm": 1.6589463949203491, "learning_rate": 0.00018717759954388986, "loss": 1.4259, "step": 5252 }, { "epoch": 0.18812111662219994, "grad_norm": 1.471097707748413, "learning_rate": 0.00018717191654653452, "loss": 1.6129, "step": 5253 }, { "epoch": 0.18815692875176823, "grad_norm": 1.1274768114089966, "learning_rate": 0.00018716623237639347, "loss": 1.4241, "step": 5254 }, { "epoch": 0.1881927408813365, "grad_norm": 1.8204070329666138, "learning_rate": 0.00018716054703354318, "loss": 1.6479, "step": 5255 }, { "epoch": 0.1882285530109048, "grad_norm": 2.424353837966919, "learning_rate": 0.0001871548605180602, "loss": 1.5044, "step": 5256 }, { "epoch": 0.18826436514047307, "grad_norm": 1.7272015810012817, "learning_rate": 0.00018714917283002094, "loss": 1.9286, "step": 5257 }, { "epoch": 0.18830017727004136, "grad_norm": 1.4467058181762695, "learning_rate": 0.000187143483969502, "loss": 1.7475, "step": 5258 }, { "epoch": 0.18833598939960966, "grad_norm": 1.6046894788742065, "learning_rate": 0.00018713779393657993, "loss": 1.5029, "step": 5259 }, { "epoch": 0.18837180152917793, "grad_norm": 1.3539988994598389, "learning_rate": 0.00018713210273133118, "loss": 1.5756, "step": 5260 }, { "epoch": 0.18840761365874623, "grad_norm": 1.6449440717697144, "learning_rate": 0.00018712641035383243, "loss": 1.4489, "step": 5261 }, { "epoch": 0.1884434257883145, "grad_norm": 1.5281622409820557, "learning_rate": 0.00018712071680416017, "loss": 1.6258, "step": 5262 }, { "epoch": 0.1884792379178828, "grad_norm": 1.7914692163467407, "learning_rate": 0.00018711502208239108, "loss": 1.2722, "step": 5263 }, { "epoch": 0.18851505004745106, "grad_norm": 1.9788846969604492, "learning_rate": 0.0001871093261886017, "loss": 1.3999, "step": 5264 }, { "epoch": 0.18855086217701936, "grad_norm": 1.66983962059021, "learning_rate": 0.00018710362912286872, "loss": 1.7335, "step": 5265 }, { "epoch": 0.18858667430658765, "grad_norm": 2.1798553466796875, "learning_rate": 0.00018709793088526877, "loss": 1.5971, "step": 5266 }, { "epoch": 0.18862248643615592, "grad_norm": 2.0857560634613037, "learning_rate": 0.0001870922314758785, "loss": 1.4487, "step": 5267 }, { "epoch": 0.18865829856572422, "grad_norm": 1.8404710292816162, "learning_rate": 0.0001870865308947746, "loss": 1.5343, "step": 5268 }, { "epoch": 0.1886941106952925, "grad_norm": 1.9581575393676758, "learning_rate": 0.00018708082914203376, "loss": 1.4325, "step": 5269 }, { "epoch": 0.18872992282486079, "grad_norm": 1.5912389755249023, "learning_rate": 0.0001870751262177327, "loss": 1.503, "step": 5270 }, { "epoch": 0.18876573495442905, "grad_norm": 2.4967665672302246, "learning_rate": 0.00018706942212194812, "loss": 1.4733, "step": 5271 }, { "epoch": 0.18880154708399735, "grad_norm": 1.365278959274292, "learning_rate": 0.0001870637168547568, "loss": 1.6569, "step": 5272 }, { "epoch": 0.18883735921356565, "grad_norm": 1.912571668624878, "learning_rate": 0.00018705801041623546, "loss": 1.7692, "step": 5273 }, { "epoch": 0.18887317134313392, "grad_norm": 2.0421035289764404, "learning_rate": 0.0001870523028064609, "loss": 1.4651, "step": 5274 }, { "epoch": 0.1889089834727022, "grad_norm": 1.6724461317062378, "learning_rate": 0.00018704659402550986, "loss": 1.6736, "step": 5275 }, { "epoch": 0.18894479560227048, "grad_norm": 2.037821054458618, "learning_rate": 0.0001870408840734592, "loss": 1.492, "step": 5276 }, { "epoch": 0.18898060773183878, "grad_norm": 1.94366455078125, "learning_rate": 0.00018703517295038573, "loss": 1.3209, "step": 5277 }, { "epoch": 0.18901641986140705, "grad_norm": 1.6621423959732056, "learning_rate": 0.00018702946065636623, "loss": 1.4488, "step": 5278 }, { "epoch": 0.18905223199097534, "grad_norm": 2.5038325786590576, "learning_rate": 0.00018702374719147766, "loss": 1.2706, "step": 5279 }, { "epoch": 0.18908804412054364, "grad_norm": 1.4643855094909668, "learning_rate": 0.00018701803255579677, "loss": 1.7987, "step": 5280 }, { "epoch": 0.1891238562501119, "grad_norm": 1.5782333612442017, "learning_rate": 0.00018701231674940054, "loss": 1.7855, "step": 5281 }, { "epoch": 0.1891596683796802, "grad_norm": 1.8929588794708252, "learning_rate": 0.0001870065997723658, "loss": 1.6753, "step": 5282 }, { "epoch": 0.18919548050924848, "grad_norm": 1.534611463546753, "learning_rate": 0.00018700088162476952, "loss": 1.439, "step": 5283 }, { "epoch": 0.18923129263881677, "grad_norm": 1.16856849193573, "learning_rate": 0.00018699516230668856, "loss": 1.6246, "step": 5284 }, { "epoch": 0.18926710476838504, "grad_norm": 1.5402215719223022, "learning_rate": 0.00018698944181819993, "loss": 1.3181, "step": 5285 }, { "epoch": 0.18930291689795334, "grad_norm": 1.398967981338501, "learning_rate": 0.00018698372015938058, "loss": 1.9008, "step": 5286 }, { "epoch": 0.18933872902752164, "grad_norm": 1.6031793355941772, "learning_rate": 0.00018697799733030746, "loss": 1.5932, "step": 5287 }, { "epoch": 0.1893745411570899, "grad_norm": 1.5183573961257935, "learning_rate": 0.00018697227333105756, "loss": 1.6072, "step": 5288 }, { "epoch": 0.1894103532866582, "grad_norm": 2.281196117401123, "learning_rate": 0.00018696654816170795, "loss": 1.6894, "step": 5289 }, { "epoch": 0.18944616541622647, "grad_norm": 1.6974093914031982, "learning_rate": 0.0001869608218223356, "loss": 1.2243, "step": 5290 }, { "epoch": 0.18948197754579477, "grad_norm": 1.6468058824539185, "learning_rate": 0.0001869550943130175, "loss": 1.2508, "step": 5291 }, { "epoch": 0.18951778967536304, "grad_norm": 1.3340821266174316, "learning_rate": 0.00018694936563383086, "loss": 1.3316, "step": 5292 }, { "epoch": 0.18955360180493133, "grad_norm": 1.515095591545105, "learning_rate": 0.00018694363578485262, "loss": 1.6026, "step": 5293 }, { "epoch": 0.18958941393449963, "grad_norm": 1.9505254030227661, "learning_rate": 0.00018693790476615992, "loss": 1.5093, "step": 5294 }, { "epoch": 0.1896252260640679, "grad_norm": 1.6551986932754517, "learning_rate": 0.00018693217257782985, "loss": 1.2679, "step": 5295 }, { "epoch": 0.1896610381936362, "grad_norm": 1.6577612161636353, "learning_rate": 0.00018692643921993952, "loss": 1.583, "step": 5296 }, { "epoch": 0.18969685032320446, "grad_norm": 1.423334002494812, "learning_rate": 0.0001869207046925661, "loss": 1.508, "step": 5297 }, { "epoch": 0.18973266245277276, "grad_norm": 1.5805872678756714, "learning_rate": 0.0001869149689957867, "loss": 1.7373, "step": 5298 }, { "epoch": 0.18976847458234103, "grad_norm": 1.982264518737793, "learning_rate": 0.0001869092321296785, "loss": 1.4873, "step": 5299 }, { "epoch": 0.18980428671190933, "grad_norm": 1.3803657293319702, "learning_rate": 0.00018690349409431872, "loss": 1.347, "step": 5300 }, { "epoch": 0.1898400988414776, "grad_norm": 1.725968360900879, "learning_rate": 0.00018689775488978452, "loss": 1.6085, "step": 5301 }, { "epoch": 0.1898759109710459, "grad_norm": 2.5847837924957275, "learning_rate": 0.0001868920145161531, "loss": 1.5907, "step": 5302 }, { "epoch": 0.1899117231006142, "grad_norm": 1.841369390487671, "learning_rate": 0.0001868862729735017, "loss": 1.8148, "step": 5303 }, { "epoch": 0.18994753523018246, "grad_norm": 2.197054147720337, "learning_rate": 0.00018688053026190757, "loss": 1.2587, "step": 5304 }, { "epoch": 0.18998334735975075, "grad_norm": 1.7132951021194458, "learning_rate": 0.000186874786381448, "loss": 1.5339, "step": 5305 }, { "epoch": 0.19001915948931902, "grad_norm": 1.6471575498580933, "learning_rate": 0.0001868690413322002, "loss": 1.5682, "step": 5306 }, { "epoch": 0.19005497161888732, "grad_norm": 1.9123592376708984, "learning_rate": 0.00018686329511424153, "loss": 1.7367, "step": 5307 }, { "epoch": 0.1900907837484556, "grad_norm": 2.1285805702209473, "learning_rate": 0.00018685754772764928, "loss": 1.7673, "step": 5308 }, { "epoch": 0.19012659587802389, "grad_norm": 1.352774739265442, "learning_rate": 0.00018685179917250072, "loss": 1.6098, "step": 5309 }, { "epoch": 0.19016240800759218, "grad_norm": 1.3797439336776733, "learning_rate": 0.0001868460494488733, "loss": 1.5094, "step": 5310 }, { "epoch": 0.19019822013716045, "grad_norm": 2.124342918395996, "learning_rate": 0.00018684029855684425, "loss": 1.6705, "step": 5311 }, { "epoch": 0.19023403226672875, "grad_norm": 1.2310181856155396, "learning_rate": 0.00018683454649649103, "loss": 1.4928, "step": 5312 }, { "epoch": 0.19026984439629702, "grad_norm": 1.4215664863586426, "learning_rate": 0.00018682879326789098, "loss": 1.6286, "step": 5313 }, { "epoch": 0.1903056565258653, "grad_norm": 1.8718503713607788, "learning_rate": 0.00018682303887112154, "loss": 1.4652, "step": 5314 }, { "epoch": 0.19034146865543358, "grad_norm": 2.483689308166504, "learning_rate": 0.00018681728330626008, "loss": 1.8905, "step": 5315 }, { "epoch": 0.19037728078500188, "grad_norm": 1.4977774620056152, "learning_rate": 0.00018681152657338404, "loss": 1.8194, "step": 5316 }, { "epoch": 0.19041309291457018, "grad_norm": 1.7027922868728638, "learning_rate": 0.00018680576867257095, "loss": 1.6571, "step": 5317 }, { "epoch": 0.19044890504413844, "grad_norm": 1.6885054111480713, "learning_rate": 0.00018680000960389818, "loss": 1.4111, "step": 5318 }, { "epoch": 0.19048471717370674, "grad_norm": 1.3242998123168945, "learning_rate": 0.00018679424936744323, "loss": 1.3061, "step": 5319 }, { "epoch": 0.190520529303275, "grad_norm": 2.3705389499664307, "learning_rate": 0.00018678848796328362, "loss": 1.2376, "step": 5320 }, { "epoch": 0.1905563414328433, "grad_norm": 1.4017654657363892, "learning_rate": 0.00018678272539149687, "loss": 1.2629, "step": 5321 }, { "epoch": 0.19059215356241158, "grad_norm": 1.6854712963104248, "learning_rate": 0.00018677696165216048, "loss": 1.6078, "step": 5322 }, { "epoch": 0.19062796569197987, "grad_norm": 1.8608250617980957, "learning_rate": 0.000186771196745352, "loss": 1.7155, "step": 5323 }, { "epoch": 0.19066377782154817, "grad_norm": 1.982079029083252, "learning_rate": 0.000186765430671149, "loss": 1.529, "step": 5324 }, { "epoch": 0.19069958995111644, "grad_norm": 1.904109239578247, "learning_rate": 0.00018675966342962904, "loss": 1.5579, "step": 5325 }, { "epoch": 0.19073540208068474, "grad_norm": 1.7409393787384033, "learning_rate": 0.00018675389502086976, "loss": 1.6718, "step": 5326 }, { "epoch": 0.190771214210253, "grad_norm": 1.7952065467834473, "learning_rate": 0.00018674812544494865, "loss": 1.6698, "step": 5327 }, { "epoch": 0.1908070263398213, "grad_norm": 2.428290367126465, "learning_rate": 0.00018674235470194348, "loss": 1.7883, "step": 5328 }, { "epoch": 0.19084283846938957, "grad_norm": 1.6777397394180298, "learning_rate": 0.0001867365827919318, "loss": 1.6194, "step": 5329 }, { "epoch": 0.19087865059895787, "grad_norm": 2.2093400955200195, "learning_rate": 0.00018673080971499126, "loss": 1.4838, "step": 5330 }, { "epoch": 0.19091446272852616, "grad_norm": 1.6835341453552246, "learning_rate": 0.00018672503547119957, "loss": 1.4022, "step": 5331 }, { "epoch": 0.19095027485809443, "grad_norm": 1.4746954441070557, "learning_rate": 0.00018671926006063442, "loss": 1.2283, "step": 5332 }, { "epoch": 0.19098608698766273, "grad_norm": 2.170846939086914, "learning_rate": 0.00018671348348337343, "loss": 1.8196, "step": 5333 }, { "epoch": 0.191021899117231, "grad_norm": 1.571633219718933, "learning_rate": 0.00018670770573949442, "loss": 1.5886, "step": 5334 }, { "epoch": 0.1910577112467993, "grad_norm": 1.9823776483535767, "learning_rate": 0.00018670192682907505, "loss": 1.6207, "step": 5335 }, { "epoch": 0.19109352337636756, "grad_norm": 1.2739793062210083, "learning_rate": 0.00018669614675219308, "loss": 1.5255, "step": 5336 }, { "epoch": 0.19112933550593586, "grad_norm": 1.8321658372879028, "learning_rate": 0.0001866903655089263, "loss": 1.7024, "step": 5337 }, { "epoch": 0.19116514763550416, "grad_norm": 1.5952870845794678, "learning_rate": 0.00018668458309935247, "loss": 1.5503, "step": 5338 }, { "epoch": 0.19120095976507243, "grad_norm": 1.4116911888122559, "learning_rate": 0.0001866787995235494, "loss": 1.8354, "step": 5339 }, { "epoch": 0.19123677189464072, "grad_norm": 1.6893408298492432, "learning_rate": 0.00018667301478159489, "loss": 1.7062, "step": 5340 }, { "epoch": 0.191272584024209, "grad_norm": 1.6460639238357544, "learning_rate": 0.00018666722887356673, "loss": 1.7274, "step": 5341 }, { "epoch": 0.1913083961537773, "grad_norm": 1.753871202468872, "learning_rate": 0.00018666144179954283, "loss": 2.059, "step": 5342 }, { "epoch": 0.19134420828334556, "grad_norm": 1.788332223892212, "learning_rate": 0.00018665565355960103, "loss": 1.2346, "step": 5343 }, { "epoch": 0.19138002041291385, "grad_norm": 1.3304243087768555, "learning_rate": 0.0001866498641538192, "loss": 1.7645, "step": 5344 }, { "epoch": 0.19141583254248215, "grad_norm": 1.1889979839324951, "learning_rate": 0.00018664407358227517, "loss": 1.5125, "step": 5345 }, { "epoch": 0.19145164467205042, "grad_norm": 2.2318155765533447, "learning_rate": 0.0001866382818450469, "loss": 1.4744, "step": 5346 }, { "epoch": 0.19148745680161872, "grad_norm": 1.7189171314239502, "learning_rate": 0.00018663248894221232, "loss": 1.466, "step": 5347 }, { "epoch": 0.19152326893118699, "grad_norm": 1.3835846185684204, "learning_rate": 0.00018662669487384936, "loss": 1.6789, "step": 5348 }, { "epoch": 0.19155908106075528, "grad_norm": 2.947704315185547, "learning_rate": 0.00018662089964003594, "loss": 1.5365, "step": 5349 }, { "epoch": 0.19159489319032355, "grad_norm": 1.4203107357025146, "learning_rate": 0.00018661510324085003, "loss": 1.6378, "step": 5350 }, { "epoch": 0.19163070531989185, "grad_norm": 1.2650010585784912, "learning_rate": 0.00018660930567636968, "loss": 1.4558, "step": 5351 }, { "epoch": 0.19166651744946014, "grad_norm": 1.485657811164856, "learning_rate": 0.00018660350694667282, "loss": 1.5388, "step": 5352 }, { "epoch": 0.1917023295790284, "grad_norm": 1.648183822631836, "learning_rate": 0.00018659770705183748, "loss": 1.3599, "step": 5353 }, { "epoch": 0.1917381417085967, "grad_norm": 1.3641210794448853, "learning_rate": 0.00018659190599194168, "loss": 1.336, "step": 5354 }, { "epoch": 0.19177395383816498, "grad_norm": 1.6528867483139038, "learning_rate": 0.0001865861037670635, "loss": 1.5748, "step": 5355 }, { "epoch": 0.19180976596773328, "grad_norm": 1.54402756690979, "learning_rate": 0.00018658030037728098, "loss": 1.495, "step": 5356 }, { "epoch": 0.19184557809730154, "grad_norm": 1.9251580238342285, "learning_rate": 0.00018657449582267218, "loss": 1.5426, "step": 5357 }, { "epoch": 0.19188139022686984, "grad_norm": 2.060964345932007, "learning_rate": 0.00018656869010331523, "loss": 1.7168, "step": 5358 }, { "epoch": 0.19191720235643814, "grad_norm": 1.657238483428955, "learning_rate": 0.00018656288321928824, "loss": 1.2452, "step": 5359 }, { "epoch": 0.1919530144860064, "grad_norm": 1.5768673419952393, "learning_rate": 0.0001865570751706693, "loss": 1.5858, "step": 5360 }, { "epoch": 0.1919888266155747, "grad_norm": 1.583263635635376, "learning_rate": 0.00018655126595753654, "loss": 1.2867, "step": 5361 }, { "epoch": 0.19202463874514297, "grad_norm": 1.5505911111831665, "learning_rate": 0.00018654545557996816, "loss": 1.5733, "step": 5362 }, { "epoch": 0.19206045087471127, "grad_norm": 1.305277705192566, "learning_rate": 0.0001865396440380423, "loss": 1.6863, "step": 5363 }, { "epoch": 0.19209626300427954, "grad_norm": 2.091987133026123, "learning_rate": 0.00018653383133183718, "loss": 1.9752, "step": 5364 }, { "epoch": 0.19213207513384783, "grad_norm": 1.7626006603240967, "learning_rate": 0.00018652801746143097, "loss": 1.6712, "step": 5365 }, { "epoch": 0.19216788726341613, "grad_norm": 1.4710909128189087, "learning_rate": 0.00018652220242690187, "loss": 1.7792, "step": 5366 }, { "epoch": 0.1922036993929844, "grad_norm": 1.8461323976516724, "learning_rate": 0.00018651638622832817, "loss": 1.3979, "step": 5367 }, { "epoch": 0.1922395115225527, "grad_norm": 1.7423018217086792, "learning_rate": 0.00018651056886578808, "loss": 1.5811, "step": 5368 }, { "epoch": 0.19227532365212097, "grad_norm": 2.2182414531707764, "learning_rate": 0.00018650475033935992, "loss": 1.6277, "step": 5369 }, { "epoch": 0.19231113578168926, "grad_norm": 1.7264537811279297, "learning_rate": 0.00018649893064912187, "loss": 1.6134, "step": 5370 }, { "epoch": 0.19234694791125753, "grad_norm": 2.0035877227783203, "learning_rate": 0.00018649310979515228, "loss": 1.6803, "step": 5371 }, { "epoch": 0.19238276004082583, "grad_norm": 1.4782098531723022, "learning_rate": 0.0001864872877775295, "loss": 1.7665, "step": 5372 }, { "epoch": 0.19241857217039413, "grad_norm": 1.79098641872406, "learning_rate": 0.00018648146459633182, "loss": 1.3728, "step": 5373 }, { "epoch": 0.1924543842999624, "grad_norm": 1.7012203931808472, "learning_rate": 0.00018647564025163756, "loss": 1.6528, "step": 5374 }, { "epoch": 0.1924901964295307, "grad_norm": 1.1748199462890625, "learning_rate": 0.00018646981474352515, "loss": 1.5076, "step": 5375 }, { "epoch": 0.19252600855909896, "grad_norm": 1.2763925790786743, "learning_rate": 0.0001864639880720729, "loss": 1.4572, "step": 5376 }, { "epoch": 0.19256182068866726, "grad_norm": 1.897787094116211, "learning_rate": 0.0001864581602373592, "loss": 1.6753, "step": 5377 }, { "epoch": 0.19259763281823553, "grad_norm": 1.900539755821228, "learning_rate": 0.00018645233123946252, "loss": 1.3106, "step": 5378 }, { "epoch": 0.19263344494780382, "grad_norm": 1.413638949394226, "learning_rate": 0.0001864465010784612, "loss": 1.4018, "step": 5379 }, { "epoch": 0.19266925707737212, "grad_norm": 1.6315624713897705, "learning_rate": 0.00018644066975443373, "loss": 1.6498, "step": 5380 }, { "epoch": 0.1927050692069404, "grad_norm": 1.668734073638916, "learning_rate": 0.00018643483726745857, "loss": 1.2368, "step": 5381 }, { "epoch": 0.19274088133650868, "grad_norm": 2.507664203643799, "learning_rate": 0.00018642900361761413, "loss": 1.5805, "step": 5382 }, { "epoch": 0.19277669346607695, "grad_norm": 2.0644752979278564, "learning_rate": 0.00018642316880497893, "loss": 1.6608, "step": 5383 }, { "epoch": 0.19281250559564525, "grad_norm": 2.1488327980041504, "learning_rate": 0.00018641733282963153, "loss": 1.9823, "step": 5384 }, { "epoch": 0.19284831772521352, "grad_norm": 1.6034685373306274, "learning_rate": 0.00018641149569165034, "loss": 1.5834, "step": 5385 }, { "epoch": 0.19288412985478182, "grad_norm": 1.9179775714874268, "learning_rate": 0.00018640565739111393, "loss": 1.534, "step": 5386 }, { "epoch": 0.1929199419843501, "grad_norm": 1.6921823024749756, "learning_rate": 0.0001863998179281009, "loss": 1.5869, "step": 5387 }, { "epoch": 0.19295575411391838, "grad_norm": 1.5110647678375244, "learning_rate": 0.0001863939773026897, "loss": 1.4647, "step": 5388 }, { "epoch": 0.19299156624348668, "grad_norm": 1.7196460962295532, "learning_rate": 0.00018638813551495901, "loss": 1.7801, "step": 5389 }, { "epoch": 0.19302737837305495, "grad_norm": 1.4753451347351074, "learning_rate": 0.0001863822925649874, "loss": 1.8287, "step": 5390 }, { "epoch": 0.19306319050262324, "grad_norm": 1.5066410303115845, "learning_rate": 0.00018637644845285344, "loss": 1.4221, "step": 5391 }, { "epoch": 0.1930990026321915, "grad_norm": 1.68746817111969, "learning_rate": 0.00018637060317863583, "loss": 1.9171, "step": 5392 }, { "epoch": 0.1931348147617598, "grad_norm": 1.6595805883407593, "learning_rate": 0.00018636475674241315, "loss": 1.6928, "step": 5393 }, { "epoch": 0.1931706268913281, "grad_norm": 1.706078290939331, "learning_rate": 0.00018635890914426404, "loss": 1.618, "step": 5394 }, { "epoch": 0.19320643902089638, "grad_norm": 1.5801364183425903, "learning_rate": 0.00018635306038426724, "loss": 1.5143, "step": 5395 }, { "epoch": 0.19324225115046467, "grad_norm": 2.3066155910491943, "learning_rate": 0.00018634721046250139, "loss": 1.4796, "step": 5396 }, { "epoch": 0.19327806328003294, "grad_norm": 1.6973354816436768, "learning_rate": 0.00018634135937904518, "loss": 1.7408, "step": 5397 }, { "epoch": 0.19331387540960124, "grad_norm": 1.4387764930725098, "learning_rate": 0.00018633550713397737, "loss": 1.4763, "step": 5398 }, { "epoch": 0.1933496875391695, "grad_norm": 1.51828134059906, "learning_rate": 0.00018632965372737665, "loss": 1.71, "step": 5399 }, { "epoch": 0.1933854996687378, "grad_norm": 2.4643683433532715, "learning_rate": 0.00018632379915932185, "loss": 1.6933, "step": 5400 }, { "epoch": 0.19342131179830607, "grad_norm": 1.6648520231246948, "learning_rate": 0.00018631794342989163, "loss": 1.2457, "step": 5401 }, { "epoch": 0.19345712392787437, "grad_norm": 1.4097614288330078, "learning_rate": 0.00018631208653916486, "loss": 1.503, "step": 5402 }, { "epoch": 0.19349293605744267, "grad_norm": 1.4005558490753174, "learning_rate": 0.0001863062284872203, "loss": 1.2093, "step": 5403 }, { "epoch": 0.19352874818701093, "grad_norm": 1.770641803741455, "learning_rate": 0.00018630036927413672, "loss": 1.409, "step": 5404 }, { "epoch": 0.19356456031657923, "grad_norm": 1.9547629356384277, "learning_rate": 0.00018629450889999302, "loss": 1.5299, "step": 5405 }, { "epoch": 0.1936003724461475, "grad_norm": 1.4061657190322876, "learning_rate": 0.000186288647364868, "loss": 1.4272, "step": 5406 }, { "epoch": 0.1936361845757158, "grad_norm": 2.2210681438446045, "learning_rate": 0.00018628278466884055, "loss": 1.4814, "step": 5407 }, { "epoch": 0.19367199670528407, "grad_norm": 1.4909852743148804, "learning_rate": 0.00018627692081198954, "loss": 1.4706, "step": 5408 }, { "epoch": 0.19370780883485236, "grad_norm": 1.9401264190673828, "learning_rate": 0.00018627105579439382, "loss": 1.2785, "step": 5409 }, { "epoch": 0.19374362096442066, "grad_norm": 1.8306653499603271, "learning_rate": 0.00018626518961613236, "loss": 1.3549, "step": 5410 }, { "epoch": 0.19377943309398893, "grad_norm": 1.7504512071609497, "learning_rate": 0.00018625932227728402, "loss": 1.6292, "step": 5411 }, { "epoch": 0.19381524522355723, "grad_norm": 1.2952179908752441, "learning_rate": 0.00018625345377792777, "loss": 1.5527, "step": 5412 }, { "epoch": 0.1938510573531255, "grad_norm": 1.9367178678512573, "learning_rate": 0.0001862475841181426, "loss": 1.611, "step": 5413 }, { "epoch": 0.1938868694826938, "grad_norm": 2.24019455909729, "learning_rate": 0.00018624171329800738, "loss": 1.7476, "step": 5414 }, { "epoch": 0.19392268161226206, "grad_norm": 1.6576284170150757, "learning_rate": 0.00018623584131760118, "loss": 1.7381, "step": 5415 }, { "epoch": 0.19395849374183036, "grad_norm": 1.9396398067474365, "learning_rate": 0.00018622996817700295, "loss": 1.5697, "step": 5416 }, { "epoch": 0.19399430587139865, "grad_norm": 1.9461170434951782, "learning_rate": 0.00018622409387629175, "loss": 1.4934, "step": 5417 }, { "epoch": 0.19403011800096692, "grad_norm": 1.8357774019241333, "learning_rate": 0.0001862182184155466, "loss": 1.7385, "step": 5418 }, { "epoch": 0.19406593013053522, "grad_norm": 1.4890779256820679, "learning_rate": 0.00018621234179484647, "loss": 1.5282, "step": 5419 }, { "epoch": 0.1941017422601035, "grad_norm": 2.412010431289673, "learning_rate": 0.00018620646401427054, "loss": 1.7779, "step": 5420 }, { "epoch": 0.19413755438967178, "grad_norm": 2.704232692718506, "learning_rate": 0.00018620058507389783, "loss": 1.5965, "step": 5421 }, { "epoch": 0.19417336651924005, "grad_norm": 1.7113075256347656, "learning_rate": 0.00018619470497380745, "loss": 1.7815, "step": 5422 }, { "epoch": 0.19420917864880835, "grad_norm": 1.8636949062347412, "learning_rate": 0.00018618882371407847, "loss": 1.6106, "step": 5423 }, { "epoch": 0.19424499077837665, "grad_norm": 1.3164920806884766, "learning_rate": 0.00018618294129479007, "loss": 1.3721, "step": 5424 }, { "epoch": 0.19428080290794492, "grad_norm": 1.5221353769302368, "learning_rate": 0.00018617705771602132, "loss": 1.4453, "step": 5425 }, { "epoch": 0.1943166150375132, "grad_norm": 2.4712367057800293, "learning_rate": 0.00018617117297785145, "loss": 1.9291, "step": 5426 }, { "epoch": 0.19435242716708148, "grad_norm": 1.8102853298187256, "learning_rate": 0.00018616528708035958, "loss": 1.6105, "step": 5427 }, { "epoch": 0.19438823929664978, "grad_norm": 1.8387972116470337, "learning_rate": 0.00018615940002362496, "loss": 1.4088, "step": 5428 }, { "epoch": 0.19442405142621805, "grad_norm": 1.8663088083267212, "learning_rate": 0.0001861535118077267, "loss": 1.5058, "step": 5429 }, { "epoch": 0.19445986355578634, "grad_norm": 2.4539215564727783, "learning_rate": 0.0001861476224327441, "loss": 1.8223, "step": 5430 }, { "epoch": 0.19449567568535464, "grad_norm": 1.7828904390335083, "learning_rate": 0.00018614173189875636, "loss": 1.4433, "step": 5431 }, { "epoch": 0.1945314878149229, "grad_norm": 2.0361180305480957, "learning_rate": 0.0001861358402058427, "loss": 1.5065, "step": 5432 }, { "epoch": 0.1945672999444912, "grad_norm": 1.5484020709991455, "learning_rate": 0.00018612994735408246, "loss": 1.7755, "step": 5433 }, { "epoch": 0.19460311207405948, "grad_norm": 1.351025938987732, "learning_rate": 0.00018612405334355488, "loss": 1.5675, "step": 5434 }, { "epoch": 0.19463892420362777, "grad_norm": 1.856544852256775, "learning_rate": 0.00018611815817433925, "loss": 1.8301, "step": 5435 }, { "epoch": 0.19467473633319604, "grad_norm": 1.3813837766647339, "learning_rate": 0.00018611226184651484, "loss": 1.7018, "step": 5436 }, { "epoch": 0.19471054846276434, "grad_norm": 1.4931066036224365, "learning_rate": 0.00018610636436016106, "loss": 1.5255, "step": 5437 }, { "epoch": 0.19474636059233263, "grad_norm": 1.5089569091796875, "learning_rate": 0.00018610046571535723, "loss": 1.4895, "step": 5438 }, { "epoch": 0.1947821727219009, "grad_norm": 1.7368457317352295, "learning_rate": 0.00018609456591218266, "loss": 1.1445, "step": 5439 }, { "epoch": 0.1948179848514692, "grad_norm": 1.6913045644760132, "learning_rate": 0.0001860886649507168, "loss": 1.5326, "step": 5440 }, { "epoch": 0.19485379698103747, "grad_norm": 1.6324273347854614, "learning_rate": 0.00018608276283103896, "loss": 1.7677, "step": 5441 }, { "epoch": 0.19488960911060577, "grad_norm": 1.6233586072921753, "learning_rate": 0.0001860768595532286, "loss": 1.1984, "step": 5442 }, { "epoch": 0.19492542124017403, "grad_norm": 1.4947038888931274, "learning_rate": 0.00018607095511736515, "loss": 1.7798, "step": 5443 }, { "epoch": 0.19496123336974233, "grad_norm": 1.6145468950271606, "learning_rate": 0.00018606504952352798, "loss": 1.4282, "step": 5444 }, { "epoch": 0.19499704549931063, "grad_norm": 2.4696900844573975, "learning_rate": 0.00018605914277179664, "loss": 1.8904, "step": 5445 }, { "epoch": 0.1950328576288789, "grad_norm": 1.766887903213501, "learning_rate": 0.00018605323486225049, "loss": 1.6336, "step": 5446 }, { "epoch": 0.1950686697584472, "grad_norm": 1.9337714910507202, "learning_rate": 0.00018604732579496908, "loss": 1.5009, "step": 5447 }, { "epoch": 0.19510448188801546, "grad_norm": 1.4915785789489746, "learning_rate": 0.0001860414155700319, "loss": 1.552, "step": 5448 }, { "epoch": 0.19514029401758376, "grad_norm": 1.450589656829834, "learning_rate": 0.00018603550418751845, "loss": 1.4491, "step": 5449 }, { "epoch": 0.19517610614715203, "grad_norm": 2.733760118484497, "learning_rate": 0.0001860295916475083, "loss": 1.539, "step": 5450 }, { "epoch": 0.19521191827672033, "grad_norm": 1.459010362625122, "learning_rate": 0.00018602367795008093, "loss": 1.7806, "step": 5451 }, { "epoch": 0.19524773040628862, "grad_norm": 1.4586896896362305, "learning_rate": 0.00018601776309531593, "loss": 1.4384, "step": 5452 }, { "epoch": 0.1952835425358569, "grad_norm": 1.6691211462020874, "learning_rate": 0.00018601184708329292, "loss": 1.5412, "step": 5453 }, { "epoch": 0.1953193546654252, "grad_norm": 2.4468443393707275, "learning_rate": 0.00018600592991409141, "loss": 1.5801, "step": 5454 }, { "epoch": 0.19535516679499346, "grad_norm": 2.1524693965911865, "learning_rate": 0.00018600001158779108, "loss": 1.6428, "step": 5455 }, { "epoch": 0.19539097892456175, "grad_norm": 1.6459256410598755, "learning_rate": 0.00018599409210447152, "loss": 1.6684, "step": 5456 }, { "epoch": 0.19542679105413002, "grad_norm": 1.3564072847366333, "learning_rate": 0.0001859881714642124, "loss": 1.6664, "step": 5457 }, { "epoch": 0.19546260318369832, "grad_norm": 1.9026246070861816, "learning_rate": 0.00018598224966709332, "loss": 1.6176, "step": 5458 }, { "epoch": 0.19549841531326662, "grad_norm": 1.4654345512390137, "learning_rate": 0.00018597632671319398, "loss": 1.2719, "step": 5459 }, { "epoch": 0.19553422744283488, "grad_norm": 1.4894424676895142, "learning_rate": 0.0001859704026025941, "loss": 1.519, "step": 5460 }, { "epoch": 0.19557003957240318, "grad_norm": 1.355757713317871, "learning_rate": 0.0001859644773353733, "loss": 1.4525, "step": 5461 }, { "epoch": 0.19560585170197145, "grad_norm": 1.8707811832427979, "learning_rate": 0.00018595855091161137, "loss": 1.4504, "step": 5462 }, { "epoch": 0.19564166383153975, "grad_norm": 1.3696109056472778, "learning_rate": 0.00018595262333138802, "loss": 1.7364, "step": 5463 }, { "epoch": 0.19567747596110802, "grad_norm": 1.756722092628479, "learning_rate": 0.000185946694594783, "loss": 1.9198, "step": 5464 }, { "epoch": 0.1957132880906763, "grad_norm": 1.6089026927947998, "learning_rate": 0.000185940764701876, "loss": 1.4237, "step": 5465 }, { "epoch": 0.1957491002202446, "grad_norm": 2.058664560317993, "learning_rate": 0.00018593483365274694, "loss": 1.4842, "step": 5466 }, { "epoch": 0.19578491234981288, "grad_norm": 1.3057408332824707, "learning_rate": 0.00018592890144747553, "loss": 1.4789, "step": 5467 }, { "epoch": 0.19582072447938118, "grad_norm": 2.4167938232421875, "learning_rate": 0.00018592296808614156, "loss": 1.9637, "step": 5468 }, { "epoch": 0.19585653660894944, "grad_norm": 1.7090920209884644, "learning_rate": 0.0001859170335688249, "loss": 1.3604, "step": 5469 }, { "epoch": 0.19589234873851774, "grad_norm": 1.6871206760406494, "learning_rate": 0.0001859110978956054, "loss": 1.3423, "step": 5470 }, { "epoch": 0.195928160868086, "grad_norm": 1.3249101638793945, "learning_rate": 0.00018590516106656288, "loss": 1.6105, "step": 5471 }, { "epoch": 0.1959639729976543, "grad_norm": 2.58074688911438, "learning_rate": 0.00018589922308177723, "loss": 1.5327, "step": 5472 }, { "epoch": 0.1959997851272226, "grad_norm": 1.6249611377716064, "learning_rate": 0.0001858932839413283, "loss": 1.4159, "step": 5473 }, { "epoch": 0.19603559725679087, "grad_norm": 1.391710877418518, "learning_rate": 0.0001858873436452961, "loss": 1.2536, "step": 5474 }, { "epoch": 0.19607140938635917, "grad_norm": 1.7863149642944336, "learning_rate": 0.0001858814021937604, "loss": 1.2555, "step": 5475 }, { "epoch": 0.19610722151592744, "grad_norm": 2.6330697536468506, "learning_rate": 0.0001858754595868013, "loss": 1.3862, "step": 5476 }, { "epoch": 0.19614303364549573, "grad_norm": 1.5848230123519897, "learning_rate": 0.0001858695158244986, "loss": 1.36, "step": 5477 }, { "epoch": 0.196178845775064, "grad_norm": 2.69571852684021, "learning_rate": 0.00018586357090693233, "loss": 2.119, "step": 5478 }, { "epoch": 0.1962146579046323, "grad_norm": 1.6064741611480713, "learning_rate": 0.0001858576248341825, "loss": 1.4919, "step": 5479 }, { "epoch": 0.1962504700342006, "grad_norm": 3.198639154434204, "learning_rate": 0.00018585167760632905, "loss": 1.6805, "step": 5480 }, { "epoch": 0.19628628216376887, "grad_norm": 1.5669358968734741, "learning_rate": 0.00018584572922345202, "loss": 1.42, "step": 5481 }, { "epoch": 0.19632209429333716, "grad_norm": 1.6380369663238525, "learning_rate": 0.00018583977968563144, "loss": 1.4726, "step": 5482 }, { "epoch": 0.19635790642290543, "grad_norm": 1.8254414796829224, "learning_rate": 0.00018583382899294736, "loss": 1.4393, "step": 5483 }, { "epoch": 0.19639371855247373, "grad_norm": 1.700395941734314, "learning_rate": 0.00018582787714547982, "loss": 1.3454, "step": 5484 }, { "epoch": 0.196429530682042, "grad_norm": 1.4598270654678345, "learning_rate": 0.0001858219241433089, "loss": 1.7212, "step": 5485 }, { "epoch": 0.1964653428116103, "grad_norm": 1.7634484767913818, "learning_rate": 0.0001858159699865147, "loss": 1.47, "step": 5486 }, { "epoch": 0.1965011549411786, "grad_norm": 1.5036978721618652, "learning_rate": 0.00018581001467517734, "loss": 1.8402, "step": 5487 }, { "epoch": 0.19653696707074686, "grad_norm": 2.055551767349243, "learning_rate": 0.00018580405820937688, "loss": 2.0814, "step": 5488 }, { "epoch": 0.19657277920031516, "grad_norm": 1.3088536262512207, "learning_rate": 0.0001857981005891935, "loss": 1.5177, "step": 5489 }, { "epoch": 0.19660859132988343, "grad_norm": 1.9697926044464111, "learning_rate": 0.00018579214181470736, "loss": 1.3903, "step": 5490 }, { "epoch": 0.19664440345945172, "grad_norm": 1.503617286682129, "learning_rate": 0.00018578618188599863, "loss": 1.5119, "step": 5491 }, { "epoch": 0.19668021558902, "grad_norm": 2.178367853164673, "learning_rate": 0.00018578022080314747, "loss": 1.3386, "step": 5492 }, { "epoch": 0.1967160277185883, "grad_norm": 1.7492157220840454, "learning_rate": 0.00018577425856623408, "loss": 1.5961, "step": 5493 }, { "epoch": 0.19675183984815658, "grad_norm": 1.7134050130844116, "learning_rate": 0.00018576829517533868, "loss": 1.3217, "step": 5494 }, { "epoch": 0.19678765197772485, "grad_norm": 1.4412091970443726, "learning_rate": 0.00018576233063054151, "loss": 1.5938, "step": 5495 }, { "epoch": 0.19682346410729315, "grad_norm": 1.636104702949524, "learning_rate": 0.00018575636493192282, "loss": 1.8974, "step": 5496 }, { "epoch": 0.19685927623686142, "grad_norm": 1.2458416223526, "learning_rate": 0.00018575039807956282, "loss": 1.6478, "step": 5497 }, { "epoch": 0.19689508836642972, "grad_norm": 2.1648898124694824, "learning_rate": 0.00018574443007354186, "loss": 1.4979, "step": 5498 }, { "epoch": 0.19693090049599798, "grad_norm": 1.4280565977096558, "learning_rate": 0.00018573846091394017, "loss": 1.4956, "step": 5499 }, { "epoch": 0.19696671262556628, "grad_norm": 1.6333988904953003, "learning_rate": 0.00018573249060083812, "loss": 1.5715, "step": 5500 }, { "epoch": 0.19700252475513455, "grad_norm": 1.6430987119674683, "learning_rate": 0.00018572651913431596, "loss": 1.699, "step": 5501 }, { "epoch": 0.19703833688470285, "grad_norm": 1.433403730392456, "learning_rate": 0.00018572054651445408, "loss": 1.5459, "step": 5502 }, { "epoch": 0.19707414901427114, "grad_norm": 2.47914981842041, "learning_rate": 0.00018571457274133279, "loss": 1.7359, "step": 5503 }, { "epoch": 0.1971099611438394, "grad_norm": 1.688639760017395, "learning_rate": 0.0001857085978150325, "loss": 1.7368, "step": 5504 }, { "epoch": 0.1971457732734077, "grad_norm": 1.4147371053695679, "learning_rate": 0.0001857026217356336, "loss": 1.4653, "step": 5505 }, { "epoch": 0.19718158540297598, "grad_norm": 1.7496812343597412, "learning_rate": 0.00018569664450321645, "loss": 1.5381, "step": 5506 }, { "epoch": 0.19721739753254428, "grad_norm": 1.178428292274475, "learning_rate": 0.00018569066611786152, "loss": 1.559, "step": 5507 }, { "epoch": 0.19725320966211254, "grad_norm": 1.813152551651001, "learning_rate": 0.00018568468657964918, "loss": 1.4128, "step": 5508 }, { "epoch": 0.19728902179168084, "grad_norm": 1.3909872770309448, "learning_rate": 0.00018567870588865994, "loss": 1.6463, "step": 5509 }, { "epoch": 0.19732483392124914, "grad_norm": 1.5748597383499146, "learning_rate": 0.0001856727240449742, "loss": 1.7144, "step": 5510 }, { "epoch": 0.1973606460508174, "grad_norm": 1.5605316162109375, "learning_rate": 0.0001856667410486725, "loss": 1.8678, "step": 5511 }, { "epoch": 0.1973964581803857, "grad_norm": 1.2146022319793701, "learning_rate": 0.00018566075689983527, "loss": 1.7029, "step": 5512 }, { "epoch": 0.19743227030995397, "grad_norm": 1.2470415830612183, "learning_rate": 0.00018565477159854306, "loss": 1.7266, "step": 5513 }, { "epoch": 0.19746808243952227, "grad_norm": 1.8456315994262695, "learning_rate": 0.00018564878514487637, "loss": 1.4474, "step": 5514 }, { "epoch": 0.19750389456909054, "grad_norm": 1.6164729595184326, "learning_rate": 0.0001856427975389158, "loss": 1.8401, "step": 5515 }, { "epoch": 0.19753970669865883, "grad_norm": 2.2162938117980957, "learning_rate": 0.00018563680878074182, "loss": 1.4896, "step": 5516 }, { "epoch": 0.19757551882822713, "grad_norm": 1.71847403049469, "learning_rate": 0.00018563081887043505, "loss": 1.5033, "step": 5517 }, { "epoch": 0.1976113309577954, "grad_norm": 1.6429033279418945, "learning_rate": 0.00018562482780807606, "loss": 1.4215, "step": 5518 }, { "epoch": 0.1976471430873637, "grad_norm": 1.78325355052948, "learning_rate": 0.00018561883559374548, "loss": 1.4578, "step": 5519 }, { "epoch": 0.19768295521693197, "grad_norm": 2.1721127033233643, "learning_rate": 0.0001856128422275239, "loss": 1.6051, "step": 5520 }, { "epoch": 0.19771876734650026, "grad_norm": 1.7439571619033813, "learning_rate": 0.00018560684770949198, "loss": 1.6344, "step": 5521 }, { "epoch": 0.19775457947606853, "grad_norm": 2.096254348754883, "learning_rate": 0.0001856008520397303, "loss": 1.6342, "step": 5522 }, { "epoch": 0.19779039160563683, "grad_norm": 2.046941041946411, "learning_rate": 0.00018559485521831958, "loss": 1.6274, "step": 5523 }, { "epoch": 0.19782620373520513, "grad_norm": 1.634363055229187, "learning_rate": 0.00018558885724534054, "loss": 1.4636, "step": 5524 }, { "epoch": 0.1978620158647734, "grad_norm": 1.593102216720581, "learning_rate": 0.00018558285812087378, "loss": 1.6564, "step": 5525 }, { "epoch": 0.1978978279943417, "grad_norm": 1.5818394422531128, "learning_rate": 0.0001855768578450001, "loss": 1.6222, "step": 5526 }, { "epoch": 0.19793364012390996, "grad_norm": 1.5421195030212402, "learning_rate": 0.00018557085641780018, "loss": 1.6816, "step": 5527 }, { "epoch": 0.19796945225347826, "grad_norm": 1.7133044004440308, "learning_rate": 0.0001855648538393547, "loss": 1.7907, "step": 5528 }, { "epoch": 0.19800526438304653, "grad_norm": 3.0813517570495605, "learning_rate": 0.00018555885010974454, "loss": 1.4835, "step": 5529 }, { "epoch": 0.19804107651261482, "grad_norm": 2.0917038917541504, "learning_rate": 0.00018555284522905042, "loss": 1.6455, "step": 5530 }, { "epoch": 0.19807688864218312, "grad_norm": 1.5909149646759033, "learning_rate": 0.00018554683919735313, "loss": 1.6325, "step": 5531 }, { "epoch": 0.1981127007717514, "grad_norm": 1.382629156112671, "learning_rate": 0.0001855408320147334, "loss": 1.5228, "step": 5532 }, { "epoch": 0.19814851290131968, "grad_norm": 1.8413569927215576, "learning_rate": 0.00018553482368127217, "loss": 1.6239, "step": 5533 }, { "epoch": 0.19818432503088795, "grad_norm": 1.6792410612106323, "learning_rate": 0.0001855288141970502, "loss": 1.5107, "step": 5534 }, { "epoch": 0.19822013716045625, "grad_norm": 1.9996005296707153, "learning_rate": 0.00018552280356214838, "loss": 1.5626, "step": 5535 }, { "epoch": 0.19825594929002452, "grad_norm": 1.782104253768921, "learning_rate": 0.00018551679177664755, "loss": 1.4933, "step": 5536 }, { "epoch": 0.19829176141959282, "grad_norm": 1.5002424716949463, "learning_rate": 0.0001855107788406286, "loss": 1.6503, "step": 5537 }, { "epoch": 0.1983275735491611, "grad_norm": 1.3976023197174072, "learning_rate": 0.0001855047647541724, "loss": 1.5177, "step": 5538 }, { "epoch": 0.19836338567872938, "grad_norm": 1.560597538948059, "learning_rate": 0.00018549874951735988, "loss": 1.7587, "step": 5539 }, { "epoch": 0.19839919780829768, "grad_norm": 1.5787992477416992, "learning_rate": 0.00018549273313027198, "loss": 1.5259, "step": 5540 }, { "epoch": 0.19843500993786595, "grad_norm": 1.5140466690063477, "learning_rate": 0.00018548671559298963, "loss": 1.6429, "step": 5541 }, { "epoch": 0.19847082206743424, "grad_norm": 1.7304792404174805, "learning_rate": 0.00018548069690559383, "loss": 1.6498, "step": 5542 }, { "epoch": 0.1985066341970025, "grad_norm": 2.0761163234710693, "learning_rate": 0.00018547467706816546, "loss": 1.4024, "step": 5543 }, { "epoch": 0.1985424463265708, "grad_norm": 1.838121771812439, "learning_rate": 0.00018546865608078559, "loss": 1.7005, "step": 5544 }, { "epoch": 0.1985782584561391, "grad_norm": 1.4359781742095947, "learning_rate": 0.0001854626339435352, "loss": 1.7055, "step": 5545 }, { "epoch": 0.19861407058570738, "grad_norm": 1.474090576171875, "learning_rate": 0.0001854566106564953, "loss": 1.4552, "step": 5546 }, { "epoch": 0.19864988271527567, "grad_norm": 1.9340765476226807, "learning_rate": 0.00018545058621974693, "loss": 1.5708, "step": 5547 }, { "epoch": 0.19868569484484394, "grad_norm": 1.570233941078186, "learning_rate": 0.00018544456063337116, "loss": 1.4817, "step": 5548 }, { "epoch": 0.19872150697441224, "grad_norm": 1.671566128730774, "learning_rate": 0.00018543853389744905, "loss": 1.593, "step": 5549 }, { "epoch": 0.1987573191039805, "grad_norm": 1.457958459854126, "learning_rate": 0.00018543250601206165, "loss": 1.7928, "step": 5550 }, { "epoch": 0.1987931312335488, "grad_norm": 1.4195009469985962, "learning_rate": 0.00018542647697729009, "loss": 1.4611, "step": 5551 }, { "epoch": 0.1988289433631171, "grad_norm": 1.9061682224273682, "learning_rate": 0.00018542044679321549, "loss": 1.582, "step": 5552 }, { "epoch": 0.19886475549268537, "grad_norm": 1.399892807006836, "learning_rate": 0.00018541441545991892, "loss": 1.5558, "step": 5553 }, { "epoch": 0.19890056762225367, "grad_norm": 1.7510415315628052, "learning_rate": 0.00018540838297748162, "loss": 1.3359, "step": 5554 }, { "epoch": 0.19893637975182193, "grad_norm": 1.2389678955078125, "learning_rate": 0.0001854023493459847, "loss": 1.3077, "step": 5555 }, { "epoch": 0.19897219188139023, "grad_norm": 1.1171600818634033, "learning_rate": 0.00018539631456550927, "loss": 1.1792, "step": 5556 }, { "epoch": 0.1990080040109585, "grad_norm": 1.5652157068252563, "learning_rate": 0.00018539027863613664, "loss": 1.5377, "step": 5557 }, { "epoch": 0.1990438161405268, "grad_norm": 1.341874361038208, "learning_rate": 0.0001853842415579479, "loss": 1.6419, "step": 5558 }, { "epoch": 0.1990796282700951, "grad_norm": 2.2000255584716797, "learning_rate": 0.0001853782033310244, "loss": 1.5106, "step": 5559 }, { "epoch": 0.19911544039966336, "grad_norm": 1.4743818044662476, "learning_rate": 0.00018537216395544723, "loss": 1.5688, "step": 5560 }, { "epoch": 0.19915125252923166, "grad_norm": 2.2175040245056152, "learning_rate": 0.00018536612343129778, "loss": 1.505, "step": 5561 }, { "epoch": 0.19918706465879993, "grad_norm": 1.6958733797073364, "learning_rate": 0.0001853600817586572, "loss": 1.762, "step": 5562 }, { "epoch": 0.19922287678836822, "grad_norm": 1.9428437948226929, "learning_rate": 0.00018535403893760684, "loss": 1.6962, "step": 5563 }, { "epoch": 0.1992586889179365, "grad_norm": 1.8468763828277588, "learning_rate": 0.00018534799496822802, "loss": 1.464, "step": 5564 }, { "epoch": 0.1992945010475048, "grad_norm": 1.587731122970581, "learning_rate": 0.00018534194985060198, "loss": 1.6521, "step": 5565 }, { "epoch": 0.1993303131770731, "grad_norm": 1.6171441078186035, "learning_rate": 0.0001853359035848101, "loss": 1.6767, "step": 5566 }, { "epoch": 0.19936612530664136, "grad_norm": 2.446106195449829, "learning_rate": 0.0001853298561709337, "loss": 1.5723, "step": 5567 }, { "epoch": 0.19940193743620965, "grad_norm": 1.455934762954712, "learning_rate": 0.0001853238076090542, "loss": 1.6693, "step": 5568 }, { "epoch": 0.19943774956577792, "grad_norm": 1.3956012725830078, "learning_rate": 0.00018531775789925288, "loss": 1.5087, "step": 5569 }, { "epoch": 0.19947356169534622, "grad_norm": 1.2480905055999756, "learning_rate": 0.00018531170704161117, "loss": 1.4799, "step": 5570 }, { "epoch": 0.1995093738249145, "grad_norm": 2.1647627353668213, "learning_rate": 0.00018530565503621052, "loss": 1.5694, "step": 5571 }, { "epoch": 0.19954518595448278, "grad_norm": 2.104330062866211, "learning_rate": 0.00018529960188313233, "loss": 1.3568, "step": 5572 }, { "epoch": 0.19958099808405108, "grad_norm": 1.5048644542694092, "learning_rate": 0.000185293547582458, "loss": 1.4253, "step": 5573 }, { "epoch": 0.19961681021361935, "grad_norm": 1.904709815979004, "learning_rate": 0.000185287492134269, "loss": 1.7133, "step": 5574 }, { "epoch": 0.19965262234318765, "grad_norm": 2.1871891021728516, "learning_rate": 0.00018528143553864682, "loss": 1.4204, "step": 5575 }, { "epoch": 0.19968843447275592, "grad_norm": 1.9296875, "learning_rate": 0.00018527537779567294, "loss": 1.8226, "step": 5576 }, { "epoch": 0.1997242466023242, "grad_norm": 1.6656574010849, "learning_rate": 0.00018526931890542882, "loss": 1.4225, "step": 5577 }, { "epoch": 0.19976005873189248, "grad_norm": 1.660765290260315, "learning_rate": 0.00018526325886799601, "loss": 1.3672, "step": 5578 }, { "epoch": 0.19979587086146078, "grad_norm": 1.7996066808700562, "learning_rate": 0.00018525719768345606, "loss": 1.7538, "step": 5579 }, { "epoch": 0.19983168299102907, "grad_norm": 2.2600836753845215, "learning_rate": 0.00018525113535189047, "loss": 2.0307, "step": 5580 }, { "epoch": 0.19986749512059734, "grad_norm": 1.627731204032898, "learning_rate": 0.00018524507187338082, "loss": 1.7304, "step": 5581 }, { "epoch": 0.19990330725016564, "grad_norm": 1.4446642398834229, "learning_rate": 0.00018523900724800872, "loss": 1.4868, "step": 5582 }, { "epoch": 0.1999391193797339, "grad_norm": 1.7544971704483032, "learning_rate": 0.00018523294147585568, "loss": 1.373, "step": 5583 }, { "epoch": 0.1999749315093022, "grad_norm": 1.6773810386657715, "learning_rate": 0.00018522687455700337, "loss": 1.5266, "step": 5584 }, { "epoch": 0.20001074363887048, "grad_norm": 1.7085083723068237, "learning_rate": 0.0001852208064915334, "loss": 1.6517, "step": 5585 }, { "epoch": 0.20004655576843877, "grad_norm": 1.6179802417755127, "learning_rate": 0.00018521473727952742, "loss": 1.577, "step": 5586 }, { "epoch": 0.20008236789800707, "grad_norm": 1.6990835666656494, "learning_rate": 0.00018520866692106703, "loss": 1.7635, "step": 5587 }, { "epoch": 0.20011818002757534, "grad_norm": 1.5353055000305176, "learning_rate": 0.00018520259541623398, "loss": 1.6554, "step": 5588 }, { "epoch": 0.20015399215714363, "grad_norm": 1.858586072921753, "learning_rate": 0.0001851965227651099, "loss": 1.5469, "step": 5589 }, { "epoch": 0.2001898042867119, "grad_norm": 1.5021321773529053, "learning_rate": 0.00018519044896777648, "loss": 1.8475, "step": 5590 }, { "epoch": 0.2002256164162802, "grad_norm": 1.355085015296936, "learning_rate": 0.0001851843740243155, "loss": 1.6449, "step": 5591 }, { "epoch": 0.20026142854584847, "grad_norm": 1.4706250429153442, "learning_rate": 0.00018517829793480861, "loss": 1.6929, "step": 5592 }, { "epoch": 0.20029724067541677, "grad_norm": 2.08127498626709, "learning_rate": 0.0001851722206993376, "loss": 1.5465, "step": 5593 }, { "epoch": 0.20033305280498506, "grad_norm": 2.2466256618499756, "learning_rate": 0.00018516614231798423, "loss": 1.8191, "step": 5594 }, { "epoch": 0.20036886493455333, "grad_norm": 1.2826673984527588, "learning_rate": 0.00018516006279083026, "loss": 1.7799, "step": 5595 }, { "epoch": 0.20040467706412163, "grad_norm": 1.394559621810913, "learning_rate": 0.0001851539821179575, "loss": 1.4522, "step": 5596 }, { "epoch": 0.2004404891936899, "grad_norm": 1.4629602432250977, "learning_rate": 0.00018514790029944777, "loss": 1.7201, "step": 5597 }, { "epoch": 0.2004763013232582, "grad_norm": 1.7223575115203857, "learning_rate": 0.00018514181733538285, "loss": 1.4502, "step": 5598 }, { "epoch": 0.20051211345282646, "grad_norm": 1.4611260890960693, "learning_rate": 0.00018513573322584463, "loss": 1.6936, "step": 5599 }, { "epoch": 0.20054792558239476, "grad_norm": 1.4663982391357422, "learning_rate": 0.0001851296479709149, "loss": 1.7965, "step": 5600 }, { "epoch": 0.20058373771196303, "grad_norm": 1.651157259941101, "learning_rate": 0.00018512356157067558, "loss": 1.5611, "step": 5601 }, { "epoch": 0.20061954984153132, "grad_norm": 1.538913369178772, "learning_rate": 0.00018511747402520857, "loss": 1.1651, "step": 5602 }, { "epoch": 0.20065536197109962, "grad_norm": 1.3776566982269287, "learning_rate": 0.0001851113853345957, "loss": 1.676, "step": 5603 }, { "epoch": 0.2006911741006679, "grad_norm": 1.6608750820159912, "learning_rate": 0.00018510529549891895, "loss": 1.6722, "step": 5604 }, { "epoch": 0.2007269862302362, "grad_norm": 2.144692897796631, "learning_rate": 0.00018509920451826022, "loss": 1.2254, "step": 5605 }, { "epoch": 0.20076279835980446, "grad_norm": 1.9433279037475586, "learning_rate": 0.00018509311239270145, "loss": 1.4043, "step": 5606 }, { "epoch": 0.20079861048937275, "grad_norm": 1.8131729364395142, "learning_rate": 0.00018508701912232464, "loss": 1.4766, "step": 5607 }, { "epoch": 0.20083442261894102, "grad_norm": 1.691941261291504, "learning_rate": 0.00018508092470721175, "loss": 1.3085, "step": 5608 }, { "epoch": 0.20087023474850932, "grad_norm": 1.7415412664413452, "learning_rate": 0.0001850748291474447, "loss": 1.6581, "step": 5609 }, { "epoch": 0.20090604687807762, "grad_norm": 1.5415544509887695, "learning_rate": 0.00018506873244310563, "loss": 1.3757, "step": 5610 }, { "epoch": 0.20094185900764588, "grad_norm": 1.8270162343978882, "learning_rate": 0.00018506263459427648, "loss": 1.4954, "step": 5611 }, { "epoch": 0.20097767113721418, "grad_norm": 1.324442982673645, "learning_rate": 0.00018505653560103928, "loss": 1.5829, "step": 5612 }, { "epoch": 0.20101348326678245, "grad_norm": 1.567414402961731, "learning_rate": 0.00018505043546347612, "loss": 1.3768, "step": 5613 }, { "epoch": 0.20104929539635075, "grad_norm": 1.504832148551941, "learning_rate": 0.00018504433418166908, "loss": 1.4962, "step": 5614 }, { "epoch": 0.20108510752591902, "grad_norm": 1.508852481842041, "learning_rate": 0.00018503823175570021, "loss": 1.6095, "step": 5615 }, { "epoch": 0.2011209196554873, "grad_norm": 1.5696232318878174, "learning_rate": 0.00018503212818565161, "loss": 1.3963, "step": 5616 }, { "epoch": 0.2011567317850556, "grad_norm": 1.629343867301941, "learning_rate": 0.00018502602347160544, "loss": 1.4996, "step": 5617 }, { "epoch": 0.20119254391462388, "grad_norm": 1.793253779411316, "learning_rate": 0.00018501991761364376, "loss": 1.4764, "step": 5618 }, { "epoch": 0.20122835604419217, "grad_norm": 1.5109243392944336, "learning_rate": 0.00018501381061184876, "loss": 1.71, "step": 5619 }, { "epoch": 0.20126416817376044, "grad_norm": 1.6845890283584595, "learning_rate": 0.0001850077024663026, "loss": 1.411, "step": 5620 }, { "epoch": 0.20129998030332874, "grad_norm": 1.3381202220916748, "learning_rate": 0.00018500159317708749, "loss": 1.9312, "step": 5621 }, { "epoch": 0.201335792432897, "grad_norm": 1.788730502128601, "learning_rate": 0.00018499548274428557, "loss": 1.4369, "step": 5622 }, { "epoch": 0.2013716045624653, "grad_norm": 1.5795271396636963, "learning_rate": 0.00018498937116797904, "loss": 1.4818, "step": 5623 }, { "epoch": 0.2014074166920336, "grad_norm": 1.4016799926757812, "learning_rate": 0.0001849832584482502, "loss": 1.6474, "step": 5624 }, { "epoch": 0.20144322882160187, "grad_norm": 1.7416629791259766, "learning_rate": 0.00018497714458518122, "loss": 1.6052, "step": 5625 }, { "epoch": 0.20147904095117017, "grad_norm": 1.3918477296829224, "learning_rate": 0.00018497102957885434, "loss": 1.349, "step": 5626 }, { "epoch": 0.20151485308073844, "grad_norm": 1.4549763202667236, "learning_rate": 0.0001849649134293519, "loss": 1.5576, "step": 5627 }, { "epoch": 0.20155066521030673, "grad_norm": 1.955651044845581, "learning_rate": 0.00018495879613675612, "loss": 1.4563, "step": 5628 }, { "epoch": 0.201586477339875, "grad_norm": 1.9450856447219849, "learning_rate": 0.00018495267770114935, "loss": 1.3856, "step": 5629 }, { "epoch": 0.2016222894694433, "grad_norm": 1.2987256050109863, "learning_rate": 0.00018494655812261387, "loss": 1.5538, "step": 5630 }, { "epoch": 0.2016581015990116, "grad_norm": 1.7535088062286377, "learning_rate": 0.00018494043740123202, "loss": 1.7828, "step": 5631 }, { "epoch": 0.20169391372857987, "grad_norm": 1.5175572633743286, "learning_rate": 0.00018493431553708614, "loss": 1.6587, "step": 5632 }, { "epoch": 0.20172972585814816, "grad_norm": 1.82554292678833, "learning_rate": 0.0001849281925302586, "loss": 1.6141, "step": 5633 }, { "epoch": 0.20176553798771643, "grad_norm": 1.5592626333236694, "learning_rate": 0.0001849220683808318, "loss": 1.5767, "step": 5634 }, { "epoch": 0.20180135011728473, "grad_norm": 1.3777804374694824, "learning_rate": 0.00018491594308888814, "loss": 1.4966, "step": 5635 }, { "epoch": 0.201837162246853, "grad_norm": 1.6428275108337402, "learning_rate": 0.00018490981665450994, "loss": 1.6563, "step": 5636 }, { "epoch": 0.2018729743764213, "grad_norm": 1.416022777557373, "learning_rate": 0.00018490368907777974, "loss": 1.8694, "step": 5637 }, { "epoch": 0.2019087865059896, "grad_norm": 1.6186103820800781, "learning_rate": 0.0001848975603587799, "loss": 1.5395, "step": 5638 }, { "epoch": 0.20194459863555786, "grad_norm": 1.5777395963668823, "learning_rate": 0.00018489143049759286, "loss": 1.6282, "step": 5639 }, { "epoch": 0.20198041076512616, "grad_norm": 1.8486963510513306, "learning_rate": 0.00018488529949430116, "loss": 1.5511, "step": 5640 }, { "epoch": 0.20201622289469442, "grad_norm": 2.059964418411255, "learning_rate": 0.00018487916734898722, "loss": 1.5742, "step": 5641 }, { "epoch": 0.20205203502426272, "grad_norm": 2.6553430557250977, "learning_rate": 0.0001848730340617336, "loss": 1.5722, "step": 5642 }, { "epoch": 0.202087847153831, "grad_norm": 1.6141916513442993, "learning_rate": 0.00018486689963262277, "loss": 1.6907, "step": 5643 }, { "epoch": 0.2021236592833993, "grad_norm": 1.7546532154083252, "learning_rate": 0.00018486076406173726, "loss": 1.4523, "step": 5644 }, { "epoch": 0.20215947141296758, "grad_norm": 1.4749776124954224, "learning_rate": 0.00018485462734915966, "loss": 1.5986, "step": 5645 }, { "epoch": 0.20219528354253585, "grad_norm": 1.5348141193389893, "learning_rate": 0.0001848484894949725, "loss": 1.7698, "step": 5646 }, { "epoch": 0.20223109567210415, "grad_norm": 1.4728552103042603, "learning_rate": 0.00018484235049925836, "loss": 1.6859, "step": 5647 }, { "epoch": 0.20226690780167242, "grad_norm": 2.210272789001465, "learning_rate": 0.00018483621036209983, "loss": 1.907, "step": 5648 }, { "epoch": 0.20230271993124072, "grad_norm": 1.673674464225769, "learning_rate": 0.0001848300690835795, "loss": 1.665, "step": 5649 }, { "epoch": 0.20233853206080898, "grad_norm": 1.225216269493103, "learning_rate": 0.00018482392666378003, "loss": 1.5294, "step": 5650 }, { "epoch": 0.20237434419037728, "grad_norm": 1.159347414970398, "learning_rate": 0.00018481778310278405, "loss": 1.4917, "step": 5651 }, { "epoch": 0.20241015631994558, "grad_norm": 1.6550319194793701, "learning_rate": 0.0001848116384006742, "loss": 1.6163, "step": 5652 }, { "epoch": 0.20244596844951385, "grad_norm": 1.5937591791152954, "learning_rate": 0.00018480549255753313, "loss": 1.4051, "step": 5653 }, { "epoch": 0.20248178057908214, "grad_norm": 1.5911157131195068, "learning_rate": 0.0001847993455734436, "loss": 1.6417, "step": 5654 }, { "epoch": 0.2025175927086504, "grad_norm": 1.6294200420379639, "learning_rate": 0.00018479319744848821, "loss": 1.7678, "step": 5655 }, { "epoch": 0.2025534048382187, "grad_norm": 2.7911853790283203, "learning_rate": 0.00018478704818274976, "loss": 1.693, "step": 5656 }, { "epoch": 0.20258921696778698, "grad_norm": 1.8016701936721802, "learning_rate": 0.00018478089777631092, "loss": 1.608, "step": 5657 }, { "epoch": 0.20262502909735527, "grad_norm": 1.3736586570739746, "learning_rate": 0.00018477474622925449, "loss": 1.5401, "step": 5658 }, { "epoch": 0.20266084122692357, "grad_norm": 1.949476718902588, "learning_rate": 0.00018476859354166317, "loss": 1.7005, "step": 5659 }, { "epoch": 0.20269665335649184, "grad_norm": 1.9096870422363281, "learning_rate": 0.0001847624397136198, "loss": 1.6647, "step": 5660 }, { "epoch": 0.20273246548606014, "grad_norm": 1.571459412574768, "learning_rate": 0.0001847562847452071, "loss": 1.6983, "step": 5661 }, { "epoch": 0.2027682776156284, "grad_norm": 1.6884174346923828, "learning_rate": 0.0001847501286365079, "loss": 1.4849, "step": 5662 }, { "epoch": 0.2028040897451967, "grad_norm": 1.2368390560150146, "learning_rate": 0.00018474397138760508, "loss": 1.5239, "step": 5663 }, { "epoch": 0.20283990187476497, "grad_norm": 2.1719744205474854, "learning_rate": 0.00018473781299858146, "loss": 1.7039, "step": 5664 }, { "epoch": 0.20287571400433327, "grad_norm": 1.8494501113891602, "learning_rate": 0.0001847316534695198, "loss": 1.8157, "step": 5665 }, { "epoch": 0.20291152613390157, "grad_norm": 1.4276621341705322, "learning_rate": 0.0001847254928005031, "loss": 1.8173, "step": 5666 }, { "epoch": 0.20294733826346983, "grad_norm": 1.520430088043213, "learning_rate": 0.00018471933099161415, "loss": 1.6106, "step": 5667 }, { "epoch": 0.20298315039303813, "grad_norm": 1.8959459066390991, "learning_rate": 0.00018471316804293594, "loss": 1.7846, "step": 5668 }, { "epoch": 0.2030189625226064, "grad_norm": 1.4433817863464355, "learning_rate": 0.00018470700395455125, "loss": 1.9018, "step": 5669 }, { "epoch": 0.2030547746521747, "grad_norm": 1.5954314470291138, "learning_rate": 0.00018470083872654312, "loss": 1.2725, "step": 5670 }, { "epoch": 0.20309058678174297, "grad_norm": 1.5294626951217651, "learning_rate": 0.00018469467235899444, "loss": 1.5162, "step": 5671 }, { "epoch": 0.20312639891131126, "grad_norm": 1.519560694694519, "learning_rate": 0.00018468850485198822, "loss": 1.5687, "step": 5672 }, { "epoch": 0.20316221104087956, "grad_norm": 2.257767915725708, "learning_rate": 0.00018468233620560739, "loss": 1.3524, "step": 5673 }, { "epoch": 0.20319802317044783, "grad_norm": 1.5276037454605103, "learning_rate": 0.00018467616641993498, "loss": 1.6337, "step": 5674 }, { "epoch": 0.20323383530001612, "grad_norm": 1.8103950023651123, "learning_rate": 0.00018466999549505392, "loss": 1.4483, "step": 5675 }, { "epoch": 0.2032696474295844, "grad_norm": 1.5021846294403076, "learning_rate": 0.00018466382343104734, "loss": 1.6137, "step": 5676 }, { "epoch": 0.2033054595591527, "grad_norm": 1.6001121997833252, "learning_rate": 0.00018465765022799823, "loss": 1.3985, "step": 5677 }, { "epoch": 0.20334127168872096, "grad_norm": 1.44736647605896, "learning_rate": 0.00018465147588598958, "loss": 1.2853, "step": 5678 }, { "epoch": 0.20337708381828926, "grad_norm": 1.746927261352539, "learning_rate": 0.00018464530040510456, "loss": 1.6917, "step": 5679 }, { "epoch": 0.20341289594785755, "grad_norm": 1.9499015808105469, "learning_rate": 0.0001846391237854262, "loss": 1.7338, "step": 5680 }, { "epoch": 0.20344870807742582, "grad_norm": 1.527923583984375, "learning_rate": 0.0001846329460270376, "loss": 1.5749, "step": 5681 }, { "epoch": 0.20348452020699412, "grad_norm": 1.8569077253341675, "learning_rate": 0.0001846267671300219, "loss": 1.7941, "step": 5682 }, { "epoch": 0.2035203323365624, "grad_norm": 1.7201156616210938, "learning_rate": 0.00018462058709446216, "loss": 1.7085, "step": 5683 }, { "epoch": 0.20355614446613068, "grad_norm": 3.1380867958068848, "learning_rate": 0.00018461440592044165, "loss": 1.887, "step": 5684 }, { "epoch": 0.20359195659569895, "grad_norm": 1.3634907007217407, "learning_rate": 0.00018460822360804338, "loss": 1.6883, "step": 5685 }, { "epoch": 0.20362776872526725, "grad_norm": 1.4703376293182373, "learning_rate": 0.00018460204015735064, "loss": 1.5107, "step": 5686 }, { "epoch": 0.20366358085483555, "grad_norm": 1.7667032480239868, "learning_rate": 0.00018459585556844656, "loss": 1.829, "step": 5687 }, { "epoch": 0.20369939298440382, "grad_norm": 2.221970796585083, "learning_rate": 0.00018458966984141438, "loss": 1.3804, "step": 5688 }, { "epoch": 0.2037352051139721, "grad_norm": 2.146343231201172, "learning_rate": 0.00018458348297633727, "loss": 1.6312, "step": 5689 }, { "epoch": 0.20377101724354038, "grad_norm": 1.7487691640853882, "learning_rate": 0.00018457729497329853, "loss": 1.38, "step": 5690 }, { "epoch": 0.20380682937310868, "grad_norm": 1.7649887800216675, "learning_rate": 0.0001845711058323814, "loss": 1.6627, "step": 5691 }, { "epoch": 0.20384264150267695, "grad_norm": 1.3033711910247803, "learning_rate": 0.0001845649155536691, "loss": 1.6029, "step": 5692 }, { "epoch": 0.20387845363224524, "grad_norm": 1.5455960035324097, "learning_rate": 0.00018455872413724496, "loss": 1.573, "step": 5693 }, { "epoch": 0.2039142657618135, "grad_norm": 1.7799946069717407, "learning_rate": 0.00018455253158319225, "loss": 1.4815, "step": 5694 }, { "epoch": 0.2039500778913818, "grad_norm": 1.9104725122451782, "learning_rate": 0.00018454633789159427, "loss": 1.684, "step": 5695 }, { "epoch": 0.2039858900209501, "grad_norm": 1.4825611114501953, "learning_rate": 0.0001845401430625344, "loss": 1.5976, "step": 5696 }, { "epoch": 0.20402170215051837, "grad_norm": 1.7644739151000977, "learning_rate": 0.00018453394709609598, "loss": 2.0426, "step": 5697 }, { "epoch": 0.20405751428008667, "grad_norm": 1.5432175397872925, "learning_rate": 0.0001845277499923623, "loss": 1.8283, "step": 5698 }, { "epoch": 0.20409332640965494, "grad_norm": 2.520534038543701, "learning_rate": 0.0001845215517514168, "loss": 1.7116, "step": 5699 }, { "epoch": 0.20412913853922324, "grad_norm": 1.333713173866272, "learning_rate": 0.0001845153523733428, "loss": 1.5942, "step": 5700 }, { "epoch": 0.2041649506687915, "grad_norm": 1.654269814491272, "learning_rate": 0.00018450915185822382, "loss": 1.8312, "step": 5701 }, { "epoch": 0.2042007627983598, "grad_norm": 1.8361196517944336, "learning_rate": 0.00018450295020614317, "loss": 1.592, "step": 5702 }, { "epoch": 0.2042365749279281, "grad_norm": 1.52461576461792, "learning_rate": 0.00018449674741718433, "loss": 1.8999, "step": 5703 }, { "epoch": 0.20427238705749637, "grad_norm": 1.7082089185714722, "learning_rate": 0.00018449054349143072, "loss": 1.4507, "step": 5704 }, { "epoch": 0.20430819918706467, "grad_norm": 1.8044872283935547, "learning_rate": 0.0001844843384289659, "loss": 1.9495, "step": 5705 }, { "epoch": 0.20434401131663293, "grad_norm": 2.0654454231262207, "learning_rate": 0.00018447813222987323, "loss": 2.0662, "step": 5706 }, { "epoch": 0.20437982344620123, "grad_norm": 2.090245008468628, "learning_rate": 0.00018447192489423625, "loss": 1.7459, "step": 5707 }, { "epoch": 0.2044156355757695, "grad_norm": 1.994872808456421, "learning_rate": 0.00018446571642213852, "loss": 2.0094, "step": 5708 }, { "epoch": 0.2044514477053378, "grad_norm": 1.3152283430099487, "learning_rate": 0.0001844595068136635, "loss": 1.6219, "step": 5709 }, { "epoch": 0.2044872598349061, "grad_norm": 1.7344331741333008, "learning_rate": 0.0001844532960688948, "loss": 1.5429, "step": 5710 }, { "epoch": 0.20452307196447436, "grad_norm": 1.7377667427062988, "learning_rate": 0.0001844470841879159, "loss": 1.6329, "step": 5711 }, { "epoch": 0.20455888409404266, "grad_norm": 1.2515347003936768, "learning_rate": 0.00018444087117081042, "loss": 1.5964, "step": 5712 }, { "epoch": 0.20459469622361093, "grad_norm": 1.538106918334961, "learning_rate": 0.00018443465701766196, "loss": 1.323, "step": 5713 }, { "epoch": 0.20463050835317922, "grad_norm": 1.9756258726119995, "learning_rate": 0.0001844284417285541, "loss": 1.729, "step": 5714 }, { "epoch": 0.2046663204827475, "grad_norm": 1.4631593227386475, "learning_rate": 0.00018442222530357043, "loss": 1.7244, "step": 5715 }, { "epoch": 0.2047021326123158, "grad_norm": 1.497786045074463, "learning_rate": 0.00018441600774279465, "loss": 1.6359, "step": 5716 }, { "epoch": 0.2047379447418841, "grad_norm": 1.5431510210037231, "learning_rate": 0.00018440978904631032, "loss": 1.3245, "step": 5717 }, { "epoch": 0.20477375687145236, "grad_norm": 1.3295104503631592, "learning_rate": 0.00018440356921420122, "loss": 1.5629, "step": 5718 }, { "epoch": 0.20480956900102065, "grad_norm": 1.2782552242279053, "learning_rate": 0.00018439734824655092, "loss": 1.3756, "step": 5719 }, { "epoch": 0.20484538113058892, "grad_norm": 1.6534874439239502, "learning_rate": 0.00018439112614344322, "loss": 1.3338, "step": 5720 }, { "epoch": 0.20488119326015722, "grad_norm": 1.5594265460968018, "learning_rate": 0.0001843849029049617, "loss": 1.5184, "step": 5721 }, { "epoch": 0.2049170053897255, "grad_norm": 1.6822896003723145, "learning_rate": 0.00018437867853119023, "loss": 1.4627, "step": 5722 }, { "epoch": 0.20495281751929378, "grad_norm": 2.03576397895813, "learning_rate": 0.00018437245302221244, "loss": 1.646, "step": 5723 }, { "epoch": 0.20498862964886208, "grad_norm": 1.5040608644485474, "learning_rate": 0.00018436622637811215, "loss": 1.596, "step": 5724 }, { "epoch": 0.20502444177843035, "grad_norm": 1.8175115585327148, "learning_rate": 0.0001843599985989731, "loss": 1.4633, "step": 5725 }, { "epoch": 0.20506025390799865, "grad_norm": 2.4232726097106934, "learning_rate": 0.0001843537696848791, "loss": 1.5774, "step": 5726 }, { "epoch": 0.20509606603756692, "grad_norm": 1.5448403358459473, "learning_rate": 0.0001843475396359139, "loss": 1.234, "step": 5727 }, { "epoch": 0.2051318781671352, "grad_norm": 1.332551121711731, "learning_rate": 0.00018434130845216138, "loss": 1.4809, "step": 5728 }, { "epoch": 0.20516769029670348, "grad_norm": 1.351884365081787, "learning_rate": 0.00018433507613370534, "loss": 1.569, "step": 5729 }, { "epoch": 0.20520350242627178, "grad_norm": 1.5869837999343872, "learning_rate": 0.00018432884268062964, "loss": 1.7137, "step": 5730 }, { "epoch": 0.20523931455584007, "grad_norm": 1.690000295639038, "learning_rate": 0.00018432260809301816, "loss": 1.5449, "step": 5731 }, { "epoch": 0.20527512668540834, "grad_norm": 1.3282488584518433, "learning_rate": 0.00018431637237095472, "loss": 1.3576, "step": 5732 }, { "epoch": 0.20531093881497664, "grad_norm": 2.1671175956726074, "learning_rate": 0.00018431013551452327, "loss": 1.5015, "step": 5733 }, { "epoch": 0.2053467509445449, "grad_norm": 1.4850959777832031, "learning_rate": 0.0001843038975238077, "loss": 1.2529, "step": 5734 }, { "epoch": 0.2053825630741132, "grad_norm": 1.4230232238769531, "learning_rate": 0.00018429765839889193, "loss": 1.3751, "step": 5735 }, { "epoch": 0.20541837520368147, "grad_norm": 1.605311632156372, "learning_rate": 0.0001842914181398599, "loss": 1.6034, "step": 5736 }, { "epoch": 0.20545418733324977, "grad_norm": 1.7204928398132324, "learning_rate": 0.00018428517674679557, "loss": 1.3672, "step": 5737 }, { "epoch": 0.20548999946281807, "grad_norm": 1.6784799098968506, "learning_rate": 0.0001842789342197829, "loss": 1.5203, "step": 5738 }, { "epoch": 0.20552581159238634, "grad_norm": 1.939692497253418, "learning_rate": 0.00018427269055890588, "loss": 1.9134, "step": 5739 }, { "epoch": 0.20556162372195463, "grad_norm": 1.4052371978759766, "learning_rate": 0.00018426644576424855, "loss": 1.5995, "step": 5740 }, { "epoch": 0.2055974358515229, "grad_norm": 1.1106784343719482, "learning_rate": 0.00018426019983589482, "loss": 1.5371, "step": 5741 }, { "epoch": 0.2056332479810912, "grad_norm": 1.7211228609085083, "learning_rate": 0.00018425395277392882, "loss": 1.6038, "step": 5742 }, { "epoch": 0.20566906011065947, "grad_norm": 1.6395955085754395, "learning_rate": 0.0001842477045784346, "loss": 1.4582, "step": 5743 }, { "epoch": 0.20570487224022777, "grad_norm": 1.9705967903137207, "learning_rate": 0.00018424145524949614, "loss": 1.7143, "step": 5744 }, { "epoch": 0.20574068436979606, "grad_norm": 3.6009767055511475, "learning_rate": 0.00018423520478719758, "loss": 1.5917, "step": 5745 }, { "epoch": 0.20577649649936433, "grad_norm": 1.7975378036499023, "learning_rate": 0.00018422895319162298, "loss": 1.4714, "step": 5746 }, { "epoch": 0.20581230862893263, "grad_norm": 2.167494535446167, "learning_rate": 0.0001842227004628565, "loss": 1.4169, "step": 5747 }, { "epoch": 0.2058481207585009, "grad_norm": 2.0247933864593506, "learning_rate": 0.00018421644660098217, "loss": 1.6403, "step": 5748 }, { "epoch": 0.2058839328880692, "grad_norm": 1.7096855640411377, "learning_rate": 0.00018421019160608424, "loss": 1.5459, "step": 5749 }, { "epoch": 0.20591974501763746, "grad_norm": 2.594543933868408, "learning_rate": 0.00018420393547824676, "loss": 1.4367, "step": 5750 }, { "epoch": 0.20595555714720576, "grad_norm": 2.801156520843506, "learning_rate": 0.000184197678217554, "loss": 1.5627, "step": 5751 }, { "epoch": 0.20599136927677406, "grad_norm": 1.9084726572036743, "learning_rate": 0.00018419141982409001, "loss": 1.3815, "step": 5752 }, { "epoch": 0.20602718140634232, "grad_norm": 1.531064748764038, "learning_rate": 0.00018418516029793916, "loss": 1.639, "step": 5753 }, { "epoch": 0.20606299353591062, "grad_norm": 1.4335132837295532, "learning_rate": 0.00018417889963918548, "loss": 1.849, "step": 5754 }, { "epoch": 0.2060988056654789, "grad_norm": 1.271621584892273, "learning_rate": 0.00018417263784791335, "loss": 1.7477, "step": 5755 }, { "epoch": 0.2061346177950472, "grad_norm": 1.3262522220611572, "learning_rate": 0.0001841663749242069, "loss": 1.7865, "step": 5756 }, { "epoch": 0.20617042992461546, "grad_norm": 1.6832629442214966, "learning_rate": 0.0001841601108681505, "loss": 1.3944, "step": 5757 }, { "epoch": 0.20620624205418375, "grad_norm": 1.295717477798462, "learning_rate": 0.00018415384567982833, "loss": 1.6328, "step": 5758 }, { "epoch": 0.20624205418375205, "grad_norm": 1.605613350868225, "learning_rate": 0.0001841475793593247, "loss": 1.4366, "step": 5759 }, { "epoch": 0.20627786631332032, "grad_norm": 1.8435920476913452, "learning_rate": 0.00018414131190672394, "loss": 1.5914, "step": 5760 }, { "epoch": 0.20631367844288862, "grad_norm": 1.62568199634552, "learning_rate": 0.00018413504332211037, "loss": 1.3896, "step": 5761 }, { "epoch": 0.20634949057245688, "grad_norm": 1.6363476514816284, "learning_rate": 0.00018412877360556834, "loss": 1.624, "step": 5762 }, { "epoch": 0.20638530270202518, "grad_norm": 1.4820305109024048, "learning_rate": 0.00018412250275718218, "loss": 1.5039, "step": 5763 }, { "epoch": 0.20642111483159345, "grad_norm": 1.8757277727127075, "learning_rate": 0.00018411623077703624, "loss": 1.7367, "step": 5764 }, { "epoch": 0.20645692696116175, "grad_norm": 2.476335287094116, "learning_rate": 0.0001841099576652149, "loss": 1.8845, "step": 5765 }, { "epoch": 0.20649273909073004, "grad_norm": 1.4801583290100098, "learning_rate": 0.00018410368342180263, "loss": 1.4245, "step": 5766 }, { "epoch": 0.2065285512202983, "grad_norm": 1.6733896732330322, "learning_rate": 0.00018409740804688373, "loss": 1.3733, "step": 5767 }, { "epoch": 0.2065643633498666, "grad_norm": 1.4926596879959106, "learning_rate": 0.0001840911315405427, "loss": 1.4559, "step": 5768 }, { "epoch": 0.20660017547943488, "grad_norm": 1.447648048400879, "learning_rate": 0.00018408485390286397, "loss": 1.5615, "step": 5769 }, { "epoch": 0.20663598760900317, "grad_norm": 1.9112826585769653, "learning_rate": 0.00018407857513393197, "loss": 1.665, "step": 5770 }, { "epoch": 0.20667179973857144, "grad_norm": 2.436891794204712, "learning_rate": 0.00018407229523383122, "loss": 1.681, "step": 5771 }, { "epoch": 0.20670761186813974, "grad_norm": 1.7354581356048584, "learning_rate": 0.00018406601420264618, "loss": 1.4212, "step": 5772 }, { "epoch": 0.20674342399770804, "grad_norm": 1.4902445077896118, "learning_rate": 0.00018405973204046135, "loss": 1.6882, "step": 5773 }, { "epoch": 0.2067792361272763, "grad_norm": 1.775509238243103, "learning_rate": 0.00018405344874736126, "loss": 1.3206, "step": 5774 }, { "epoch": 0.2068150482568446, "grad_norm": 1.1771329641342163, "learning_rate": 0.00018404716432343044, "loss": 1.4992, "step": 5775 }, { "epoch": 0.20685086038641287, "grad_norm": 1.741278052330017, "learning_rate": 0.0001840408787687534, "loss": 1.2352, "step": 5776 }, { "epoch": 0.20688667251598117, "grad_norm": 2.0680439472198486, "learning_rate": 0.0001840345920834148, "loss": 1.5474, "step": 5777 }, { "epoch": 0.20692248464554944, "grad_norm": 1.8485260009765625, "learning_rate": 0.00018402830426749914, "loss": 1.3399, "step": 5778 }, { "epoch": 0.20695829677511773, "grad_norm": 1.583206057548523, "learning_rate": 0.00018402201532109102, "loss": 1.6553, "step": 5779 }, { "epoch": 0.20699410890468603, "grad_norm": 1.9147899150848389, "learning_rate": 0.00018401572524427505, "loss": 1.9019, "step": 5780 }, { "epoch": 0.2070299210342543, "grad_norm": 1.735785961151123, "learning_rate": 0.0001840094340371359, "loss": 1.778, "step": 5781 }, { "epoch": 0.2070657331638226, "grad_norm": 1.9432623386383057, "learning_rate": 0.00018400314169975818, "loss": 1.5151, "step": 5782 }, { "epoch": 0.20710154529339087, "grad_norm": 1.6666193008422852, "learning_rate": 0.00018399684823222653, "loss": 1.2603, "step": 5783 }, { "epoch": 0.20713735742295916, "grad_norm": 1.642317771911621, "learning_rate": 0.00018399055363462562, "loss": 1.4213, "step": 5784 }, { "epoch": 0.20717316955252743, "grad_norm": 2.0764431953430176, "learning_rate": 0.0001839842579070402, "loss": 1.278, "step": 5785 }, { "epoch": 0.20720898168209573, "grad_norm": 1.3511927127838135, "learning_rate": 0.0001839779610495549, "loss": 1.4598, "step": 5786 }, { "epoch": 0.20724479381166402, "grad_norm": 1.897929072380066, "learning_rate": 0.00018397166306225444, "loss": 1.6758, "step": 5787 }, { "epoch": 0.2072806059412323, "grad_norm": 1.5352063179016113, "learning_rate": 0.00018396536394522359, "loss": 1.655, "step": 5788 }, { "epoch": 0.2073164180708006, "grad_norm": 1.7674046754837036, "learning_rate": 0.00018395906369854704, "loss": 1.4136, "step": 5789 }, { "epoch": 0.20735223020036886, "grad_norm": 1.2308050394058228, "learning_rate": 0.00018395276232230964, "loss": 1.4874, "step": 5790 }, { "epoch": 0.20738804232993716, "grad_norm": 1.8490521907806396, "learning_rate": 0.00018394645981659608, "loss": 1.4895, "step": 5791 }, { "epoch": 0.20742385445950542, "grad_norm": 1.8193918466567993, "learning_rate": 0.00018394015618149122, "loss": 1.3781, "step": 5792 }, { "epoch": 0.20745966658907372, "grad_norm": 1.4143835306167603, "learning_rate": 0.00018393385141707977, "loss": 1.5742, "step": 5793 }, { "epoch": 0.207495478718642, "grad_norm": 1.3196909427642822, "learning_rate": 0.00018392754552344666, "loss": 1.3337, "step": 5794 }, { "epoch": 0.2075312908482103, "grad_norm": 1.3538028001785278, "learning_rate": 0.00018392123850067668, "loss": 1.5709, "step": 5795 }, { "epoch": 0.20756710297777858, "grad_norm": 1.751012921333313, "learning_rate": 0.00018391493034885468, "loss": 1.4149, "step": 5796 }, { "epoch": 0.20760291510734685, "grad_norm": 1.4756646156311035, "learning_rate": 0.00018390862106806554, "loss": 1.5291, "step": 5797 }, { "epoch": 0.20763872723691515, "grad_norm": 1.8159650564193726, "learning_rate": 0.00018390231065839414, "loss": 1.5351, "step": 5798 }, { "epoch": 0.20767453936648342, "grad_norm": 1.8627691268920898, "learning_rate": 0.00018389599911992538, "loss": 1.4553, "step": 5799 }, { "epoch": 0.20771035149605171, "grad_norm": 1.5494425296783447, "learning_rate": 0.00018388968645274416, "loss": 1.4181, "step": 5800 }, { "epoch": 0.20774616362561998, "grad_norm": 1.2609368562698364, "learning_rate": 0.00018388337265693542, "loss": 1.474, "step": 5801 }, { "epoch": 0.20778197575518828, "grad_norm": 1.9432854652404785, "learning_rate": 0.0001838770577325841, "loss": 1.5919, "step": 5802 }, { "epoch": 0.20781778788475658, "grad_norm": 1.5743569135665894, "learning_rate": 0.00018387074167977517, "loss": 1.4351, "step": 5803 }, { "epoch": 0.20785360001432485, "grad_norm": 1.721057653427124, "learning_rate": 0.00018386442449859358, "loss": 1.538, "step": 5804 }, { "epoch": 0.20788941214389314, "grad_norm": 2.5399715900421143, "learning_rate": 0.00018385810618912435, "loss": 1.5953, "step": 5805 }, { "epoch": 0.2079252242734614, "grad_norm": 1.6732019186019897, "learning_rate": 0.00018385178675145246, "loss": 1.371, "step": 5806 }, { "epoch": 0.2079610364030297, "grad_norm": 1.5420591831207275, "learning_rate": 0.00018384546618566296, "loss": 1.3819, "step": 5807 }, { "epoch": 0.20799684853259798, "grad_norm": 2.665620803833008, "learning_rate": 0.00018383914449184084, "loss": 1.7292, "step": 5808 }, { "epoch": 0.20803266066216627, "grad_norm": 1.8281484842300415, "learning_rate": 0.0001838328216700712, "loss": 1.3194, "step": 5809 }, { "epoch": 0.20806847279173457, "grad_norm": 3.399212598800659, "learning_rate": 0.00018382649772043908, "loss": 1.9008, "step": 5810 }, { "epoch": 0.20810428492130284, "grad_norm": 1.844014286994934, "learning_rate": 0.00018382017264302955, "loss": 1.7792, "step": 5811 }, { "epoch": 0.20814009705087114, "grad_norm": 1.7047966718673706, "learning_rate": 0.0001838138464379277, "loss": 1.3427, "step": 5812 }, { "epoch": 0.2081759091804394, "grad_norm": 1.635097861289978, "learning_rate": 0.0001838075191052187, "loss": 1.8786, "step": 5813 }, { "epoch": 0.2082117213100077, "grad_norm": 1.8636630773544312, "learning_rate": 0.0001838011906449876, "loss": 1.6942, "step": 5814 }, { "epoch": 0.20824753343957597, "grad_norm": 1.397093653678894, "learning_rate": 0.0001837948610573196, "loss": 1.4005, "step": 5815 }, { "epoch": 0.20828334556914427, "grad_norm": 1.8946696519851685, "learning_rate": 0.0001837885303422998, "loss": 1.748, "step": 5816 }, { "epoch": 0.20831915769871256, "grad_norm": 1.4679687023162842, "learning_rate": 0.00018378219850001345, "loss": 1.4857, "step": 5817 }, { "epoch": 0.20835496982828083, "grad_norm": 1.5090714693069458, "learning_rate": 0.00018377586553054565, "loss": 1.7174, "step": 5818 }, { "epoch": 0.20839078195784913, "grad_norm": 1.7879695892333984, "learning_rate": 0.00018376953143398167, "loss": 1.6207, "step": 5819 }, { "epoch": 0.2084265940874174, "grad_norm": 2.3632588386535645, "learning_rate": 0.00018376319621040668, "loss": 1.4637, "step": 5820 }, { "epoch": 0.2084624062169857, "grad_norm": 2.151449680328369, "learning_rate": 0.00018375685985990594, "loss": 2.0277, "step": 5821 }, { "epoch": 0.20849821834655397, "grad_norm": 1.7777212858200073, "learning_rate": 0.00018375052238256466, "loss": 1.7095, "step": 5822 }, { "epoch": 0.20853403047612226, "grad_norm": 2.0421993732452393, "learning_rate": 0.00018374418377846817, "loss": 1.8641, "step": 5823 }, { "epoch": 0.20856984260569056, "grad_norm": 1.6348986625671387, "learning_rate": 0.0001837378440477017, "loss": 1.6932, "step": 5824 }, { "epoch": 0.20860565473525883, "grad_norm": 1.481870174407959, "learning_rate": 0.00018373150319035055, "loss": 1.6623, "step": 5825 }, { "epoch": 0.20864146686482712, "grad_norm": 1.5054895877838135, "learning_rate": 0.00018372516120650003, "loss": 1.579, "step": 5826 }, { "epoch": 0.2086772789943954, "grad_norm": 1.6486055850982666, "learning_rate": 0.00018371881809623545, "loss": 1.4551, "step": 5827 }, { "epoch": 0.2087130911239637, "grad_norm": 1.4928501844406128, "learning_rate": 0.0001837124738596422, "loss": 1.8089, "step": 5828 }, { "epoch": 0.20874890325353196, "grad_norm": 1.265228509902954, "learning_rate": 0.00018370612849680557, "loss": 1.6432, "step": 5829 }, { "epoch": 0.20878471538310026, "grad_norm": 1.9759631156921387, "learning_rate": 0.00018369978200781094, "loss": 1.6254, "step": 5830 }, { "epoch": 0.20882052751266855, "grad_norm": 1.7568730115890503, "learning_rate": 0.00018369343439274372, "loss": 1.8442, "step": 5831 }, { "epoch": 0.20885633964223682, "grad_norm": 1.8063440322875977, "learning_rate": 0.0001836870856516893, "loss": 1.4209, "step": 5832 }, { "epoch": 0.20889215177180512, "grad_norm": 1.3789457082748413, "learning_rate": 0.0001836807357847331, "loss": 1.4017, "step": 5833 }, { "epoch": 0.2089279639013734, "grad_norm": 1.9040539264678955, "learning_rate": 0.00018367438479196055, "loss": 1.7246, "step": 5834 }, { "epoch": 0.20896377603094168, "grad_norm": 2.3700289726257324, "learning_rate": 0.00018366803267345704, "loss": 1.5969, "step": 5835 }, { "epoch": 0.20899958816050995, "grad_norm": 1.2375210523605347, "learning_rate": 0.0001836616794293081, "loss": 1.5293, "step": 5836 }, { "epoch": 0.20903540029007825, "grad_norm": 1.7296556234359741, "learning_rate": 0.00018365532505959918, "loss": 1.5033, "step": 5837 }, { "epoch": 0.20907121241964655, "grad_norm": 1.7073261737823486, "learning_rate": 0.00018364896956441577, "loss": 1.6766, "step": 5838 }, { "epoch": 0.20910702454921481, "grad_norm": 1.5448671579360962, "learning_rate": 0.00018364261294384336, "loss": 1.5826, "step": 5839 }, { "epoch": 0.2091428366787831, "grad_norm": 1.3705118894577026, "learning_rate": 0.0001836362551979675, "loss": 1.5987, "step": 5840 }, { "epoch": 0.20917864880835138, "grad_norm": 1.785382866859436, "learning_rate": 0.00018362989632687374, "loss": 1.7167, "step": 5841 }, { "epoch": 0.20921446093791968, "grad_norm": 1.6626938581466675, "learning_rate": 0.00018362353633064754, "loss": 1.5908, "step": 5842 }, { "epoch": 0.20925027306748795, "grad_norm": 1.831031084060669, "learning_rate": 0.00018361717520937458, "loss": 1.8772, "step": 5843 }, { "epoch": 0.20928608519705624, "grad_norm": 1.589511752128601, "learning_rate": 0.00018361081296314037, "loss": 1.815, "step": 5844 }, { "epoch": 0.20932189732662454, "grad_norm": 1.4295203685760498, "learning_rate": 0.0001836044495920305, "loss": 1.4799, "step": 5845 }, { "epoch": 0.2093577094561928, "grad_norm": 1.112631916999817, "learning_rate": 0.00018359808509613062, "loss": 1.3803, "step": 5846 }, { "epoch": 0.2093935215857611, "grad_norm": 1.5145599842071533, "learning_rate": 0.00018359171947552631, "loss": 1.3917, "step": 5847 }, { "epoch": 0.20942933371532937, "grad_norm": 2.381051778793335, "learning_rate": 0.00018358535273030327, "loss": 1.5099, "step": 5848 }, { "epoch": 0.20946514584489767, "grad_norm": 1.7032874822616577, "learning_rate": 0.0001835789848605471, "loss": 1.3714, "step": 5849 }, { "epoch": 0.20950095797446594, "grad_norm": 1.7090039253234863, "learning_rate": 0.00018357261586634353, "loss": 1.5838, "step": 5850 }, { "epoch": 0.20953677010403424, "grad_norm": 1.4482029676437378, "learning_rate": 0.00018356624574777822, "loss": 1.5396, "step": 5851 }, { "epoch": 0.20957258223360253, "grad_norm": 1.815578579902649, "learning_rate": 0.0001835598745049368, "loss": 1.5823, "step": 5852 }, { "epoch": 0.2096083943631708, "grad_norm": 1.842309594154358, "learning_rate": 0.00018355350213790513, "loss": 1.5635, "step": 5853 }, { "epoch": 0.2096442064927391, "grad_norm": 1.4999361038208008, "learning_rate": 0.00018354712864676885, "loss": 1.5074, "step": 5854 }, { "epoch": 0.20968001862230737, "grad_norm": 1.7008081674575806, "learning_rate": 0.00018354075403161367, "loss": 1.369, "step": 5855 }, { "epoch": 0.20971583075187566, "grad_norm": 2.609595537185669, "learning_rate": 0.00018353437829252543, "loss": 1.3887, "step": 5856 }, { "epoch": 0.20975164288144393, "grad_norm": 1.640960454940796, "learning_rate": 0.00018352800142958992, "loss": 1.4278, "step": 5857 }, { "epoch": 0.20978745501101223, "grad_norm": 1.3580068349838257, "learning_rate": 0.00018352162344289284, "loss": 1.4415, "step": 5858 }, { "epoch": 0.20982326714058053, "grad_norm": 1.7644946575164795, "learning_rate": 0.0001835152443325201, "loss": 1.592, "step": 5859 }, { "epoch": 0.2098590792701488, "grad_norm": 1.3494611978530884, "learning_rate": 0.00018350886409855744, "loss": 1.6329, "step": 5860 }, { "epoch": 0.2098948913997171, "grad_norm": 1.8251352310180664, "learning_rate": 0.00018350248274109077, "loss": 1.8776, "step": 5861 }, { "epoch": 0.20993070352928536, "grad_norm": 1.4748811721801758, "learning_rate": 0.00018349610026020585, "loss": 1.4748, "step": 5862 }, { "epoch": 0.20996651565885366, "grad_norm": 1.604453206062317, "learning_rate": 0.00018348971665598865, "loss": 1.2366, "step": 5863 }, { "epoch": 0.21000232778842193, "grad_norm": 1.736939787864685, "learning_rate": 0.000183483331928525, "loss": 1.5625, "step": 5864 }, { "epoch": 0.21003813991799022, "grad_norm": 1.6238269805908203, "learning_rate": 0.00018347694607790077, "loss": 1.5814, "step": 5865 }, { "epoch": 0.21007395204755852, "grad_norm": 1.4518622159957886, "learning_rate": 0.00018347055910420193, "loss": 1.6499, "step": 5866 }, { "epoch": 0.2101097641771268, "grad_norm": 2.65487003326416, "learning_rate": 0.0001834641710075144, "loss": 1.417, "step": 5867 }, { "epoch": 0.2101455763066951, "grad_norm": 2.1541175842285156, "learning_rate": 0.0001834577817879241, "loss": 1.1873, "step": 5868 }, { "epoch": 0.21018138843626336, "grad_norm": 1.5349715948104858, "learning_rate": 0.000183451391445517, "loss": 1.6339, "step": 5869 }, { "epoch": 0.21021720056583165, "grad_norm": 1.4636268615722656, "learning_rate": 0.00018344499998037907, "loss": 1.4769, "step": 5870 }, { "epoch": 0.21025301269539992, "grad_norm": 1.8863812685012817, "learning_rate": 0.0001834386073925963, "loss": 1.6514, "step": 5871 }, { "epoch": 0.21028882482496822, "grad_norm": 1.8188995122909546, "learning_rate": 0.0001834322136822547, "loss": 1.2138, "step": 5872 }, { "epoch": 0.21032463695453651, "grad_norm": 1.6070584058761597, "learning_rate": 0.00018342581884944027, "loss": 1.5714, "step": 5873 }, { "epoch": 0.21036044908410478, "grad_norm": 1.322187900543213, "learning_rate": 0.0001834194228942391, "loss": 1.4738, "step": 5874 }, { "epoch": 0.21039626121367308, "grad_norm": 1.4101799726486206, "learning_rate": 0.00018341302581673715, "loss": 1.256, "step": 5875 }, { "epoch": 0.21043207334324135, "grad_norm": 2.0768353939056396, "learning_rate": 0.00018340662761702055, "loss": 1.7106, "step": 5876 }, { "epoch": 0.21046788547280965, "grad_norm": 1.6466964483261108, "learning_rate": 0.00018340022829517537, "loss": 1.7091, "step": 5877 }, { "epoch": 0.21050369760237791, "grad_norm": 2.0485565662384033, "learning_rate": 0.00018339382785128767, "loss": 1.7706, "step": 5878 }, { "epoch": 0.2105395097319462, "grad_norm": 1.6183522939682007, "learning_rate": 0.00018338742628544363, "loss": 1.6943, "step": 5879 }, { "epoch": 0.2105753218615145, "grad_norm": 1.507336974143982, "learning_rate": 0.0001833810235977293, "loss": 1.6523, "step": 5880 }, { "epoch": 0.21061113399108278, "grad_norm": 1.335508942604065, "learning_rate": 0.00018337461978823084, "loss": 1.5629, "step": 5881 }, { "epoch": 0.21064694612065107, "grad_norm": 1.6823132038116455, "learning_rate": 0.00018336821485703445, "loss": 1.3857, "step": 5882 }, { "epoch": 0.21068275825021934, "grad_norm": 1.4094301462173462, "learning_rate": 0.00018336180880422625, "loss": 1.7403, "step": 5883 }, { "epoch": 0.21071857037978764, "grad_norm": 2.0397469997406006, "learning_rate": 0.00018335540162989244, "loss": 1.7507, "step": 5884 }, { "epoch": 0.2107543825093559, "grad_norm": 1.7900669574737549, "learning_rate": 0.00018334899333411926, "loss": 1.6038, "step": 5885 }, { "epoch": 0.2107901946389242, "grad_norm": 1.5094026327133179, "learning_rate": 0.00018334258391699285, "loss": 1.65, "step": 5886 }, { "epoch": 0.2108260067684925, "grad_norm": 1.6741819381713867, "learning_rate": 0.00018333617337859946, "loss": 1.6157, "step": 5887 }, { "epoch": 0.21086181889806077, "grad_norm": 1.4904005527496338, "learning_rate": 0.00018332976171902537, "loss": 1.6095, "step": 5888 }, { "epoch": 0.21089763102762907, "grad_norm": 1.66598379611969, "learning_rate": 0.00018332334893835683, "loss": 1.3666, "step": 5889 }, { "epoch": 0.21093344315719734, "grad_norm": 1.658950686454773, "learning_rate": 0.00018331693503668013, "loss": 1.6743, "step": 5890 }, { "epoch": 0.21096925528676563, "grad_norm": 1.507813811302185, "learning_rate": 0.00018331052001408152, "loss": 1.3427, "step": 5891 }, { "epoch": 0.2110050674163339, "grad_norm": 1.7073756456375122, "learning_rate": 0.0001833041038706473, "loss": 1.5811, "step": 5892 }, { "epoch": 0.2110408795459022, "grad_norm": 1.5660537481307983, "learning_rate": 0.00018329768660646384, "loss": 1.5551, "step": 5893 }, { "epoch": 0.21107669167547047, "grad_norm": 1.3659303188323975, "learning_rate": 0.00018329126822161747, "loss": 1.469, "step": 5894 }, { "epoch": 0.21111250380503876, "grad_norm": 1.2605630159378052, "learning_rate": 0.0001832848487161945, "loss": 1.5376, "step": 5895 }, { "epoch": 0.21114831593460706, "grad_norm": 1.5032516717910767, "learning_rate": 0.00018327842809028134, "loss": 1.4326, "step": 5896 }, { "epoch": 0.21118412806417533, "grad_norm": 1.8132591247558594, "learning_rate": 0.00018327200634396434, "loss": 1.614, "step": 5897 }, { "epoch": 0.21121994019374363, "grad_norm": 1.4367175102233887, "learning_rate": 0.0001832655834773299, "loss": 1.4983, "step": 5898 }, { "epoch": 0.2112557523233119, "grad_norm": 1.4380050897598267, "learning_rate": 0.00018325915949046444, "loss": 1.3839, "step": 5899 }, { "epoch": 0.2112915644528802, "grad_norm": 1.1733310222625732, "learning_rate": 0.00018325273438345437, "loss": 1.582, "step": 5900 }, { "epoch": 0.21132737658244846, "grad_norm": 1.7513288259506226, "learning_rate": 0.0001832463081563862, "loss": 1.5806, "step": 5901 }, { "epoch": 0.21136318871201676, "grad_norm": 1.4104002714157104, "learning_rate": 0.00018323988080934628, "loss": 1.6676, "step": 5902 }, { "epoch": 0.21139900084158506, "grad_norm": 1.9451051950454712, "learning_rate": 0.00018323345234242118, "loss": 1.5106, "step": 5903 }, { "epoch": 0.21143481297115332, "grad_norm": 1.9381672143936157, "learning_rate": 0.0001832270227556973, "loss": 1.0622, "step": 5904 }, { "epoch": 0.21147062510072162, "grad_norm": 1.0977705717086792, "learning_rate": 0.0001832205920492612, "loss": 1.4634, "step": 5905 }, { "epoch": 0.2115064372302899, "grad_norm": 2.0630342960357666, "learning_rate": 0.0001832141602231994, "loss": 1.5579, "step": 5906 }, { "epoch": 0.2115422493598582, "grad_norm": 1.915184497833252, "learning_rate": 0.0001832077272775984, "loss": 1.8376, "step": 5907 }, { "epoch": 0.21157806148942646, "grad_norm": 2.1756672859191895, "learning_rate": 0.0001832012932125448, "loss": 1.6073, "step": 5908 }, { "epoch": 0.21161387361899475, "grad_norm": 1.440247654914856, "learning_rate": 0.00018319485802812503, "loss": 1.5361, "step": 5909 }, { "epoch": 0.21164968574856305, "grad_norm": 1.6484793424606323, "learning_rate": 0.00018318842172442582, "loss": 1.7188, "step": 5910 }, { "epoch": 0.21168549787813132, "grad_norm": 2.225233793258667, "learning_rate": 0.0001831819843015337, "loss": 1.5521, "step": 5911 }, { "epoch": 0.21172131000769961, "grad_norm": 1.6229103803634644, "learning_rate": 0.00018317554575953527, "loss": 1.4822, "step": 5912 }, { "epoch": 0.21175712213726788, "grad_norm": 2.2891218662261963, "learning_rate": 0.00018316910609851713, "loss": 1.3342, "step": 5913 }, { "epoch": 0.21179293426683618, "grad_norm": 1.2021150588989258, "learning_rate": 0.00018316266531856598, "loss": 1.6548, "step": 5914 }, { "epoch": 0.21182874639640445, "grad_norm": 1.3572343587875366, "learning_rate": 0.00018315622341976844, "loss": 1.6756, "step": 5915 }, { "epoch": 0.21186455852597275, "grad_norm": 1.7767938375473022, "learning_rate": 0.0001831497804022112, "loss": 1.398, "step": 5916 }, { "epoch": 0.21190037065554104, "grad_norm": 2.0373289585113525, "learning_rate": 0.00018314333626598089, "loss": 1.4118, "step": 5917 }, { "epoch": 0.2119361827851093, "grad_norm": 1.7908626794815063, "learning_rate": 0.0001831368910111642, "loss": 1.4941, "step": 5918 }, { "epoch": 0.2119719949146776, "grad_norm": 1.8968188762664795, "learning_rate": 0.00018313044463784793, "loss": 1.4038, "step": 5919 }, { "epoch": 0.21200780704424588, "grad_norm": 1.641762137413025, "learning_rate": 0.00018312399714611876, "loss": 1.649, "step": 5920 }, { "epoch": 0.21204361917381417, "grad_norm": 1.3661926984786987, "learning_rate": 0.00018311754853606344, "loss": 1.7253, "step": 5921 }, { "epoch": 0.21207943130338244, "grad_norm": 1.5163073539733887, "learning_rate": 0.00018311109880776868, "loss": 1.6616, "step": 5922 }, { "epoch": 0.21211524343295074, "grad_norm": 1.510380506515503, "learning_rate": 0.00018310464796132133, "loss": 1.5898, "step": 5923 }, { "epoch": 0.21215105556251904, "grad_norm": 1.931257963180542, "learning_rate": 0.0001830981959968081, "loss": 1.5899, "step": 5924 }, { "epoch": 0.2121868676920873, "grad_norm": 1.626753330230713, "learning_rate": 0.00018309174291431587, "loss": 1.5853, "step": 5925 }, { "epoch": 0.2122226798216556, "grad_norm": 1.351036548614502, "learning_rate": 0.00018308528871393138, "loss": 1.3402, "step": 5926 }, { "epoch": 0.21225849195122387, "grad_norm": 1.6709541082382202, "learning_rate": 0.00018307883339574153, "loss": 1.544, "step": 5927 }, { "epoch": 0.21229430408079217, "grad_norm": 3.2044754028320312, "learning_rate": 0.00018307237695983314, "loss": 1.5532, "step": 5928 }, { "epoch": 0.21233011621036044, "grad_norm": 1.6904844045639038, "learning_rate": 0.00018306591940629307, "loss": 1.4901, "step": 5929 }, { "epoch": 0.21236592833992873, "grad_norm": 1.53884756565094, "learning_rate": 0.00018305946073520822, "loss": 1.5167, "step": 5930 }, { "epoch": 0.21240174046949703, "grad_norm": 1.9038699865341187, "learning_rate": 0.00018305300094666543, "loss": 1.5806, "step": 5931 }, { "epoch": 0.2124375525990653, "grad_norm": 1.951897382736206, "learning_rate": 0.00018304654004075167, "loss": 1.7285, "step": 5932 }, { "epoch": 0.2124733647286336, "grad_norm": 1.620422124862671, "learning_rate": 0.0001830400780175538, "loss": 1.2822, "step": 5933 }, { "epoch": 0.21250917685820186, "grad_norm": 1.8646143674850464, "learning_rate": 0.00018303361487715883, "loss": 1.5526, "step": 5934 }, { "epoch": 0.21254498898777016, "grad_norm": 1.271231770515442, "learning_rate": 0.00018302715061965365, "loss": 1.9238, "step": 5935 }, { "epoch": 0.21258080111733843, "grad_norm": 1.5239760875701904, "learning_rate": 0.00018302068524512528, "loss": 1.327, "step": 5936 }, { "epoch": 0.21261661324690673, "grad_norm": 1.642659068107605, "learning_rate": 0.00018301421875366067, "loss": 1.6107, "step": 5937 }, { "epoch": 0.21265242537647502, "grad_norm": 1.5333079099655151, "learning_rate": 0.00018300775114534683, "loss": 1.6014, "step": 5938 }, { "epoch": 0.2126882375060433, "grad_norm": 1.676763892173767, "learning_rate": 0.00018300128242027078, "loss": 1.3634, "step": 5939 }, { "epoch": 0.2127240496356116, "grad_norm": 1.5230528116226196, "learning_rate": 0.00018299481257851952, "loss": 1.8291, "step": 5940 }, { "epoch": 0.21275986176517986, "grad_norm": 1.4611763954162598, "learning_rate": 0.00018298834162018012, "loss": 1.6594, "step": 5941 }, { "epoch": 0.21279567389474816, "grad_norm": 1.8587379455566406, "learning_rate": 0.00018298186954533962, "loss": 1.4631, "step": 5942 }, { "epoch": 0.21283148602431642, "grad_norm": 2.7046003341674805, "learning_rate": 0.00018297539635408512, "loss": 1.5129, "step": 5943 }, { "epoch": 0.21286729815388472, "grad_norm": 2.1789839267730713, "learning_rate": 0.00018296892204650367, "loss": 1.6845, "step": 5944 }, { "epoch": 0.21290311028345302, "grad_norm": 1.4094992876052856, "learning_rate": 0.00018296244662268241, "loss": 1.7135, "step": 5945 }, { "epoch": 0.2129389224130213, "grad_norm": 1.2647565603256226, "learning_rate": 0.00018295597008270847, "loss": 1.6716, "step": 5946 }, { "epoch": 0.21297473454258958, "grad_norm": 1.6723589897155762, "learning_rate": 0.00018294949242666895, "loss": 1.7836, "step": 5947 }, { "epoch": 0.21301054667215785, "grad_norm": 1.859749674797058, "learning_rate": 0.00018294301365465095, "loss": 1.6115, "step": 5948 }, { "epoch": 0.21304635880172615, "grad_norm": 1.2979680299758911, "learning_rate": 0.00018293653376674177, "loss": 1.3994, "step": 5949 }, { "epoch": 0.21308217093129442, "grad_norm": 1.4515433311462402, "learning_rate": 0.00018293005276302844, "loss": 1.3845, "step": 5950 }, { "epoch": 0.21311798306086271, "grad_norm": 1.7454895973205566, "learning_rate": 0.00018292357064359828, "loss": 1.7939, "step": 5951 }, { "epoch": 0.213153795190431, "grad_norm": 1.893728494644165, "learning_rate": 0.0001829170874085384, "loss": 1.5585, "step": 5952 }, { "epoch": 0.21318960731999928, "grad_norm": 1.5000876188278198, "learning_rate": 0.00018291060305793608, "loss": 1.524, "step": 5953 }, { "epoch": 0.21322541944956758, "grad_norm": 1.9762799739837646, "learning_rate": 0.00018290411759187855, "loss": 1.6497, "step": 5954 }, { "epoch": 0.21326123157913585, "grad_norm": 2.6302709579467773, "learning_rate": 0.00018289763101045302, "loss": 1.6087, "step": 5955 }, { "epoch": 0.21329704370870414, "grad_norm": 2.3435487747192383, "learning_rate": 0.00018289114331374685, "loss": 1.4392, "step": 5956 }, { "epoch": 0.2133328558382724, "grad_norm": 1.2770248651504517, "learning_rate": 0.00018288465450184722, "loss": 1.2857, "step": 5957 }, { "epoch": 0.2133686679678407, "grad_norm": 1.952136754989624, "learning_rate": 0.0001828781645748415, "loss": 1.5773, "step": 5958 }, { "epoch": 0.213404480097409, "grad_norm": 1.2606817483901978, "learning_rate": 0.00018287167353281698, "loss": 1.3636, "step": 5959 }, { "epoch": 0.21344029222697727, "grad_norm": 1.316763162612915, "learning_rate": 0.000182865181375861, "loss": 1.2342, "step": 5960 }, { "epoch": 0.21347610435654557, "grad_norm": 1.371982216835022, "learning_rate": 0.0001828586881040609, "loss": 1.6322, "step": 5961 }, { "epoch": 0.21351191648611384, "grad_norm": 1.4569306373596191, "learning_rate": 0.00018285219371750398, "loss": 1.389, "step": 5962 }, { "epoch": 0.21354772861568214, "grad_norm": 1.64267897605896, "learning_rate": 0.0001828456982162777, "loss": 1.734, "step": 5963 }, { "epoch": 0.2135835407452504, "grad_norm": 2.057509660720825, "learning_rate": 0.0001828392016004694, "loss": 1.8128, "step": 5964 }, { "epoch": 0.2136193528748187, "grad_norm": 1.5392036437988281, "learning_rate": 0.00018283270387016654, "loss": 1.5686, "step": 5965 }, { "epoch": 0.213655165004387, "grad_norm": 2.590017557144165, "learning_rate": 0.00018282620502545647, "loss": 1.8044, "step": 5966 }, { "epoch": 0.21369097713395527, "grad_norm": 1.446028709411621, "learning_rate": 0.00018281970506642663, "loss": 1.3209, "step": 5967 }, { "epoch": 0.21372678926352356, "grad_norm": 2.1150851249694824, "learning_rate": 0.0001828132039931645, "loss": 1.6727, "step": 5968 }, { "epoch": 0.21376260139309183, "grad_norm": 1.4321050643920898, "learning_rate": 0.00018280670180575754, "loss": 1.4389, "step": 5969 }, { "epoch": 0.21379841352266013, "grad_norm": 1.3660449981689453, "learning_rate": 0.00018280019850429321, "loss": 1.4298, "step": 5970 }, { "epoch": 0.2138342256522284, "grad_norm": 1.6467535495758057, "learning_rate": 0.000182793694088859, "loss": 1.4494, "step": 5971 }, { "epoch": 0.2138700377817967, "grad_norm": 2.1352715492248535, "learning_rate": 0.00018278718855954247, "loss": 1.7657, "step": 5972 }, { "epoch": 0.213905849911365, "grad_norm": 2.0386393070220947, "learning_rate": 0.00018278068191643107, "loss": 1.709, "step": 5973 }, { "epoch": 0.21394166204093326, "grad_norm": 1.3228604793548584, "learning_rate": 0.0001827741741596124, "loss": 1.4663, "step": 5974 }, { "epoch": 0.21397747417050156, "grad_norm": 2.2697250843048096, "learning_rate": 0.00018276766528917398, "loss": 1.8581, "step": 5975 }, { "epoch": 0.21401328630006983, "grad_norm": 1.8088141679763794, "learning_rate": 0.00018276115530520336, "loss": 1.7364, "step": 5976 }, { "epoch": 0.21404909842963812, "grad_norm": 1.4815630912780762, "learning_rate": 0.0001827546442077882, "loss": 1.3892, "step": 5977 }, { "epoch": 0.2140849105592064, "grad_norm": 1.7200106382369995, "learning_rate": 0.000182748131997016, "loss": 1.4714, "step": 5978 }, { "epoch": 0.2141207226887747, "grad_norm": 1.388688087463379, "learning_rate": 0.00018274161867297447, "loss": 1.7146, "step": 5979 }, { "epoch": 0.214156534818343, "grad_norm": 1.3790088891983032, "learning_rate": 0.00018273510423575117, "loss": 1.5785, "step": 5980 }, { "epoch": 0.21419234694791126, "grad_norm": 1.8050322532653809, "learning_rate": 0.00018272858868543374, "loss": 1.3374, "step": 5981 }, { "epoch": 0.21422815907747955, "grad_norm": 1.851999282836914, "learning_rate": 0.00018272207202210986, "loss": 1.4933, "step": 5982 }, { "epoch": 0.21426397120704782, "grad_norm": 1.7498927116394043, "learning_rate": 0.00018271555424586723, "loss": 1.8221, "step": 5983 }, { "epoch": 0.21429978333661612, "grad_norm": 1.9699684381484985, "learning_rate": 0.0001827090353567935, "loss": 1.563, "step": 5984 }, { "epoch": 0.2143355954661844, "grad_norm": 1.985451102256775, "learning_rate": 0.0001827025153549764, "loss": 1.8659, "step": 5985 }, { "epoch": 0.21437140759575268, "grad_norm": 1.738781213760376, "learning_rate": 0.00018269599424050362, "loss": 1.582, "step": 5986 }, { "epoch": 0.21440721972532098, "grad_norm": 1.600334644317627, "learning_rate": 0.00018268947201346291, "loss": 1.6137, "step": 5987 }, { "epoch": 0.21444303185488925, "grad_norm": 1.34315824508667, "learning_rate": 0.00018268294867394204, "loss": 1.4509, "step": 5988 }, { "epoch": 0.21447884398445755, "grad_norm": 2.184556007385254, "learning_rate": 0.00018267642422202873, "loss": 1.7843, "step": 5989 }, { "epoch": 0.21451465611402581, "grad_norm": 1.9627654552459717, "learning_rate": 0.00018266989865781076, "loss": 1.7417, "step": 5990 }, { "epoch": 0.2145504682435941, "grad_norm": 1.7299060821533203, "learning_rate": 0.00018266337198137594, "loss": 1.652, "step": 5991 }, { "epoch": 0.21458628037316238, "grad_norm": 1.5557527542114258, "learning_rate": 0.00018265684419281213, "loss": 1.6432, "step": 5992 }, { "epoch": 0.21462209250273068, "grad_norm": 1.7059578895568848, "learning_rate": 0.00018265031529220705, "loss": 1.456, "step": 5993 }, { "epoch": 0.21465790463229895, "grad_norm": 1.6850507259368896, "learning_rate": 0.0001826437852796486, "loss": 1.5577, "step": 5994 }, { "epoch": 0.21469371676186724, "grad_norm": 2.0986411571502686, "learning_rate": 0.00018263725415522462, "loss": 1.5896, "step": 5995 }, { "epoch": 0.21472952889143554, "grad_norm": 2.174347400665283, "learning_rate": 0.000182630721919023, "loss": 1.6033, "step": 5996 }, { "epoch": 0.2147653410210038, "grad_norm": 1.433610439300537, "learning_rate": 0.00018262418857113157, "loss": 1.6622, "step": 5997 }, { "epoch": 0.2148011531505721, "grad_norm": 1.3677752017974854, "learning_rate": 0.00018261765411163827, "loss": 1.6868, "step": 5998 }, { "epoch": 0.21483696528014037, "grad_norm": 1.4604482650756836, "learning_rate": 0.000182611118540631, "loss": 1.3887, "step": 5999 }, { "epoch": 0.21487277740970867, "grad_norm": 1.462753415107727, "learning_rate": 0.00018260458185819772, "loss": 1.5631, "step": 6000 }, { "epoch": 0.21490858953927694, "grad_norm": 1.279866099357605, "learning_rate": 0.00018259804406442633, "loss": 1.3168, "step": 6001 }, { "epoch": 0.21494440166884524, "grad_norm": 2.1085100173950195, "learning_rate": 0.0001825915051594048, "loss": 1.8449, "step": 6002 }, { "epoch": 0.21498021379841353, "grad_norm": 1.6899045705795288, "learning_rate": 0.0001825849651432211, "loss": 1.4615, "step": 6003 }, { "epoch": 0.2150160259279818, "grad_norm": 1.6363755464553833, "learning_rate": 0.0001825784240159632, "loss": 1.4947, "step": 6004 }, { "epoch": 0.2150518380575501, "grad_norm": 1.7822414636611938, "learning_rate": 0.00018257188177771914, "loss": 1.579, "step": 6005 }, { "epoch": 0.21508765018711837, "grad_norm": 2.806736469268799, "learning_rate": 0.00018256533842857695, "loss": 1.4047, "step": 6006 }, { "epoch": 0.21512346231668666, "grad_norm": 1.5889925956726074, "learning_rate": 0.0001825587939686246, "loss": 1.563, "step": 6007 }, { "epoch": 0.21515927444625493, "grad_norm": 1.2758655548095703, "learning_rate": 0.00018255224839795018, "loss": 1.6386, "step": 6008 }, { "epoch": 0.21519508657582323, "grad_norm": 1.890386939048767, "learning_rate": 0.00018254570171664174, "loss": 1.4298, "step": 6009 }, { "epoch": 0.21523089870539153, "grad_norm": 1.240126609802246, "learning_rate": 0.00018253915392478737, "loss": 1.4986, "step": 6010 }, { "epoch": 0.2152667108349598, "grad_norm": 2.025054454803467, "learning_rate": 0.00018253260502247513, "loss": 1.7933, "step": 6011 }, { "epoch": 0.2153025229645281, "grad_norm": 1.7431395053863525, "learning_rate": 0.00018252605500979316, "loss": 1.8384, "step": 6012 }, { "epoch": 0.21533833509409636, "grad_norm": 1.3581688404083252, "learning_rate": 0.00018251950388682958, "loss": 1.4347, "step": 6013 }, { "epoch": 0.21537414722366466, "grad_norm": 1.4468944072723389, "learning_rate": 0.0001825129516536725, "loss": 1.5021, "step": 6014 }, { "epoch": 0.21540995935323293, "grad_norm": 2.2720768451690674, "learning_rate": 0.0001825063983104101, "loss": 1.7324, "step": 6015 }, { "epoch": 0.21544577148280122, "grad_norm": 2.472119092941284, "learning_rate": 0.00018249984385713055, "loss": 1.905, "step": 6016 }, { "epoch": 0.21548158361236952, "grad_norm": 1.8467071056365967, "learning_rate": 0.000182493288293922, "loss": 1.4867, "step": 6017 }, { "epoch": 0.2155173957419378, "grad_norm": 1.5712488889694214, "learning_rate": 0.00018248673162087268, "loss": 1.5598, "step": 6018 }, { "epoch": 0.2155532078715061, "grad_norm": 1.773179054260254, "learning_rate": 0.00018248017383807076, "loss": 1.539, "step": 6019 }, { "epoch": 0.21558902000107436, "grad_norm": 1.6343525648117065, "learning_rate": 0.0001824736149456045, "loss": 1.6218, "step": 6020 }, { "epoch": 0.21562483213064265, "grad_norm": 1.5364235639572144, "learning_rate": 0.00018246705494356214, "loss": 1.3762, "step": 6021 }, { "epoch": 0.21566064426021092, "grad_norm": 1.940355896949768, "learning_rate": 0.00018246049383203192, "loss": 1.4093, "step": 6022 }, { "epoch": 0.21569645638977922, "grad_norm": 1.5513060092926025, "learning_rate": 0.00018245393161110215, "loss": 1.0682, "step": 6023 }, { "epoch": 0.21573226851934751, "grad_norm": 1.6082549095153809, "learning_rate": 0.00018244736828086107, "loss": 1.2319, "step": 6024 }, { "epoch": 0.21576808064891578, "grad_norm": 1.8053920269012451, "learning_rate": 0.00018244080384139698, "loss": 1.7213, "step": 6025 }, { "epoch": 0.21580389277848408, "grad_norm": 1.4318716526031494, "learning_rate": 0.00018243423829279824, "loss": 1.5302, "step": 6026 }, { "epoch": 0.21583970490805235, "grad_norm": 1.3476557731628418, "learning_rate": 0.00018242767163515318, "loss": 1.3816, "step": 6027 }, { "epoch": 0.21587551703762065, "grad_norm": 2.339296817779541, "learning_rate": 0.00018242110386855007, "loss": 1.2601, "step": 6028 }, { "epoch": 0.21591132916718891, "grad_norm": 1.4866342544555664, "learning_rate": 0.00018241453499307734, "loss": 1.4542, "step": 6029 }, { "epoch": 0.2159471412967572, "grad_norm": 2.2102510929107666, "learning_rate": 0.00018240796500882338, "loss": 1.8524, "step": 6030 }, { "epoch": 0.2159829534263255, "grad_norm": 1.8730422258377075, "learning_rate": 0.0001824013939158765, "loss": 1.7469, "step": 6031 }, { "epoch": 0.21601876555589378, "grad_norm": 1.2295218706130981, "learning_rate": 0.0001823948217143252, "loss": 1.5667, "step": 6032 }, { "epoch": 0.21605457768546207, "grad_norm": 1.4555342197418213, "learning_rate": 0.00018238824840425785, "loss": 1.3013, "step": 6033 }, { "epoch": 0.21609038981503034, "grad_norm": 2.010502576828003, "learning_rate": 0.00018238167398576286, "loss": 1.5193, "step": 6034 }, { "epoch": 0.21612620194459864, "grad_norm": 2.1535098552703857, "learning_rate": 0.00018237509845892873, "loss": 1.6717, "step": 6035 }, { "epoch": 0.2161620140741669, "grad_norm": 1.6601369380950928, "learning_rate": 0.00018236852182384393, "loss": 1.5295, "step": 6036 }, { "epoch": 0.2161978262037352, "grad_norm": 1.641852617263794, "learning_rate": 0.00018236194408059685, "loss": 1.1745, "step": 6037 }, { "epoch": 0.2162336383333035, "grad_norm": 1.5132910013198853, "learning_rate": 0.00018235536522927611, "loss": 1.6211, "step": 6038 }, { "epoch": 0.21626945046287177, "grad_norm": 1.4342702627182007, "learning_rate": 0.00018234878526997015, "loss": 1.371, "step": 6039 }, { "epoch": 0.21630526259244007, "grad_norm": 1.345091462135315, "learning_rate": 0.0001823422042027675, "loss": 1.5545, "step": 6040 }, { "epoch": 0.21634107472200834, "grad_norm": 2.147703170776367, "learning_rate": 0.0001823356220277567, "loss": 1.5573, "step": 6041 }, { "epoch": 0.21637688685157663, "grad_norm": 1.943217396736145, "learning_rate": 0.00018232903874502632, "loss": 1.3352, "step": 6042 }, { "epoch": 0.2164126989811449, "grad_norm": 1.6899324655532837, "learning_rate": 0.00018232245435466493, "loss": 1.4203, "step": 6043 }, { "epoch": 0.2164485111107132, "grad_norm": 1.9825857877731323, "learning_rate": 0.0001823158688567611, "loss": 1.591, "step": 6044 }, { "epoch": 0.2164843232402815, "grad_norm": 1.5396236181259155, "learning_rate": 0.00018230928225140342, "loss": 1.8448, "step": 6045 }, { "epoch": 0.21652013536984976, "grad_norm": 1.9984638690948486, "learning_rate": 0.00018230269453868052, "loss": 1.417, "step": 6046 }, { "epoch": 0.21655594749941806, "grad_norm": 1.8226370811462402, "learning_rate": 0.00018229610571868102, "loss": 2.0727, "step": 6047 }, { "epoch": 0.21659175962898633, "grad_norm": 1.4168521165847778, "learning_rate": 0.0001822895157914936, "loss": 1.7747, "step": 6048 }, { "epoch": 0.21662757175855463, "grad_norm": 1.5113242864608765, "learning_rate": 0.00018228292475720687, "loss": 1.7142, "step": 6049 }, { "epoch": 0.2166633838881229, "grad_norm": 1.5562756061553955, "learning_rate": 0.00018227633261590955, "loss": 1.6209, "step": 6050 }, { "epoch": 0.2166991960176912, "grad_norm": 1.5761768817901611, "learning_rate": 0.00018226973936769027, "loss": 1.6547, "step": 6051 }, { "epoch": 0.2167350081472595, "grad_norm": 1.78731369972229, "learning_rate": 0.0001822631450126378, "loss": 1.3805, "step": 6052 }, { "epoch": 0.21677082027682776, "grad_norm": 1.4102073907852173, "learning_rate": 0.00018225654955084079, "loss": 1.562, "step": 6053 }, { "epoch": 0.21680663240639605, "grad_norm": 1.5794233083724976, "learning_rate": 0.00018224995298238804, "loss": 1.6456, "step": 6054 }, { "epoch": 0.21684244453596432, "grad_norm": 1.8132075071334839, "learning_rate": 0.00018224335530736825, "loss": 1.9961, "step": 6055 }, { "epoch": 0.21687825666553262, "grad_norm": 1.6012133359909058, "learning_rate": 0.0001822367565258702, "loss": 1.7669, "step": 6056 }, { "epoch": 0.2169140687951009, "grad_norm": 1.3363044261932373, "learning_rate": 0.0001822301566379827, "loss": 1.6712, "step": 6057 }, { "epoch": 0.2169498809246692, "grad_norm": 1.529463291168213, "learning_rate": 0.00018222355564379448, "loss": 1.3612, "step": 6058 }, { "epoch": 0.21698569305423748, "grad_norm": 1.4082953929901123, "learning_rate": 0.00018221695354339435, "loss": 1.4312, "step": 6059 }, { "epoch": 0.21702150518380575, "grad_norm": 1.7950927019119263, "learning_rate": 0.00018221035033687123, "loss": 1.5232, "step": 6060 }, { "epoch": 0.21705731731337405, "grad_norm": 1.8718847036361694, "learning_rate": 0.00018220374602431386, "loss": 1.7111, "step": 6061 }, { "epoch": 0.21709312944294232, "grad_norm": 1.4784345626831055, "learning_rate": 0.0001821971406058111, "loss": 1.6418, "step": 6062 }, { "epoch": 0.21712894157251061, "grad_norm": 1.6133242845535278, "learning_rate": 0.00018219053408145185, "loss": 1.3773, "step": 6063 }, { "epoch": 0.21716475370207888, "grad_norm": 1.6696078777313232, "learning_rate": 0.000182183926451325, "loss": 1.6862, "step": 6064 }, { "epoch": 0.21720056583164718, "grad_norm": 1.7715810537338257, "learning_rate": 0.00018217731771551942, "loss": 1.734, "step": 6065 }, { "epoch": 0.21723637796121548, "grad_norm": 1.6767003536224365, "learning_rate": 0.00018217070787412404, "loss": 1.4907, "step": 6066 }, { "epoch": 0.21727219009078375, "grad_norm": 1.7085485458374023, "learning_rate": 0.00018216409692722779, "loss": 1.8883, "step": 6067 }, { "epoch": 0.21730800222035204, "grad_norm": 1.3934434652328491, "learning_rate": 0.00018215748487491958, "loss": 1.3403, "step": 6068 }, { "epoch": 0.2173438143499203, "grad_norm": 2.090029001235962, "learning_rate": 0.00018215087171728837, "loss": 1.7042, "step": 6069 }, { "epoch": 0.2173796264794886, "grad_norm": 2.287304639816284, "learning_rate": 0.00018214425745442317, "loss": 1.7766, "step": 6070 }, { "epoch": 0.21741543860905688, "grad_norm": 1.7092816829681396, "learning_rate": 0.00018213764208641292, "loss": 1.4644, "step": 6071 }, { "epoch": 0.21745125073862517, "grad_norm": 1.7535308599472046, "learning_rate": 0.00018213102561334668, "loss": 1.39, "step": 6072 }, { "epoch": 0.21748706286819347, "grad_norm": 2.193551778793335, "learning_rate": 0.00018212440803531342, "loss": 1.5143, "step": 6073 }, { "epoch": 0.21752287499776174, "grad_norm": 1.7536066770553589, "learning_rate": 0.00018211778935240219, "loss": 1.617, "step": 6074 }, { "epoch": 0.21755868712733004, "grad_norm": 1.5110563039779663, "learning_rate": 0.00018211116956470203, "loss": 1.6147, "step": 6075 }, { "epoch": 0.2175944992568983, "grad_norm": 1.4865481853485107, "learning_rate": 0.00018210454867230195, "loss": 1.6124, "step": 6076 }, { "epoch": 0.2176303113864666, "grad_norm": 1.9881222248077393, "learning_rate": 0.00018209792667529112, "loss": 1.5487, "step": 6077 }, { "epoch": 0.21766612351603487, "grad_norm": 1.733006477355957, "learning_rate": 0.00018209130357375858, "loss": 1.7815, "step": 6078 }, { "epoch": 0.21770193564560317, "grad_norm": 1.2904987335205078, "learning_rate": 0.00018208467936779347, "loss": 1.8821, "step": 6079 }, { "epoch": 0.21773774777517146, "grad_norm": 2.602548837661743, "learning_rate": 0.00018207805405748482, "loss": 1.4122, "step": 6080 }, { "epoch": 0.21777355990473973, "grad_norm": 1.7389355897903442, "learning_rate": 0.00018207142764292187, "loss": 1.4373, "step": 6081 }, { "epoch": 0.21780937203430803, "grad_norm": 1.316788911819458, "learning_rate": 0.00018206480012419372, "loss": 1.6428, "step": 6082 }, { "epoch": 0.2178451841638763, "grad_norm": 1.8363261222839355, "learning_rate": 0.0001820581715013895, "loss": 1.4394, "step": 6083 }, { "epoch": 0.2178809962934446, "grad_norm": 1.57510507106781, "learning_rate": 0.0001820515417745985, "loss": 1.4141, "step": 6084 }, { "epoch": 0.21791680842301286, "grad_norm": 1.3749953508377075, "learning_rate": 0.0001820449109439098, "loss": 1.5511, "step": 6085 }, { "epoch": 0.21795262055258116, "grad_norm": 1.5707284212112427, "learning_rate": 0.00018203827900941264, "loss": 1.7035, "step": 6086 }, { "epoch": 0.21798843268214946, "grad_norm": 1.7157187461853027, "learning_rate": 0.0001820316459711963, "loss": 1.599, "step": 6087 }, { "epoch": 0.21802424481171773, "grad_norm": 1.5144253969192505, "learning_rate": 0.0001820250118293499, "loss": 1.9309, "step": 6088 }, { "epoch": 0.21806005694128602, "grad_norm": 1.6175367832183838, "learning_rate": 0.00018201837658396287, "loss": 1.6032, "step": 6089 }, { "epoch": 0.2180958690708543, "grad_norm": 1.6035107374191284, "learning_rate": 0.00018201174023512433, "loss": 1.4334, "step": 6090 }, { "epoch": 0.2181316812004226, "grad_norm": 1.8502047061920166, "learning_rate": 0.0001820051027829236, "loss": 1.354, "step": 6091 }, { "epoch": 0.21816749332999086, "grad_norm": 1.380337119102478, "learning_rate": 0.00018199846422745002, "loss": 1.6663, "step": 6092 }, { "epoch": 0.21820330545955915, "grad_norm": 1.2796369791030884, "learning_rate": 0.00018199182456879286, "loss": 1.5156, "step": 6093 }, { "epoch": 0.21823911758912742, "grad_norm": 1.8583347797393799, "learning_rate": 0.00018198518380704143, "loss": 1.896, "step": 6094 }, { "epoch": 0.21827492971869572, "grad_norm": 1.7820069789886475, "learning_rate": 0.00018197854194228517, "loss": 1.8372, "step": 6095 }, { "epoch": 0.21831074184826402, "grad_norm": 2.359118938446045, "learning_rate": 0.00018197189897461332, "loss": 1.5367, "step": 6096 }, { "epoch": 0.2183465539778323, "grad_norm": 1.677788496017456, "learning_rate": 0.00018196525490411534, "loss": 1.5914, "step": 6097 }, { "epoch": 0.21838236610740058, "grad_norm": 1.7194525003433228, "learning_rate": 0.00018195860973088058, "loss": 1.6376, "step": 6098 }, { "epoch": 0.21841817823696885, "grad_norm": 1.8285008668899536, "learning_rate": 0.00018195196345499842, "loss": 1.8412, "step": 6099 }, { "epoch": 0.21845399036653715, "grad_norm": 2.399778366088867, "learning_rate": 0.00018194531607655833, "loss": 1.3773, "step": 6100 }, { "epoch": 0.21848980249610542, "grad_norm": 1.259798288345337, "learning_rate": 0.0001819386675956497, "loss": 1.3212, "step": 6101 }, { "epoch": 0.21852561462567371, "grad_norm": 1.758084774017334, "learning_rate": 0.000181932018012362, "loss": 1.6877, "step": 6102 }, { "epoch": 0.218561426755242, "grad_norm": 1.4051774740219116, "learning_rate": 0.00018192536732678468, "loss": 1.4873, "step": 6103 }, { "epoch": 0.21859723888481028, "grad_norm": 1.8246309757232666, "learning_rate": 0.00018191871553900718, "loss": 1.7631, "step": 6104 }, { "epoch": 0.21863305101437858, "grad_norm": 1.6398242712020874, "learning_rate": 0.00018191206264911908, "loss": 1.3941, "step": 6105 }, { "epoch": 0.21866886314394685, "grad_norm": 2.2544310092926025, "learning_rate": 0.0001819054086572098, "loss": 1.7397, "step": 6106 }, { "epoch": 0.21870467527351514, "grad_norm": 1.5466108322143555, "learning_rate": 0.00018189875356336893, "loss": 1.5326, "step": 6107 }, { "epoch": 0.2187404874030834, "grad_norm": 1.4698082208633423, "learning_rate": 0.00018189209736768595, "loss": 1.4511, "step": 6108 }, { "epoch": 0.2187762995326517, "grad_norm": 1.9529378414154053, "learning_rate": 0.00018188544007025043, "loss": 1.8635, "step": 6109 }, { "epoch": 0.21881211166222, "grad_norm": 1.4893909692764282, "learning_rate": 0.00018187878167115197, "loss": 1.6932, "step": 6110 }, { "epoch": 0.21884792379178827, "grad_norm": 1.3069435358047485, "learning_rate": 0.00018187212217048008, "loss": 1.5887, "step": 6111 }, { "epoch": 0.21888373592135657, "grad_norm": 1.8581095933914185, "learning_rate": 0.00018186546156832444, "loss": 1.7178, "step": 6112 }, { "epoch": 0.21891954805092484, "grad_norm": 1.3659560680389404, "learning_rate": 0.00018185879986477456, "loss": 1.2872, "step": 6113 }, { "epoch": 0.21895536018049314, "grad_norm": 1.5570614337921143, "learning_rate": 0.00018185213705992014, "loss": 1.5665, "step": 6114 }, { "epoch": 0.2189911723100614, "grad_norm": 1.6068415641784668, "learning_rate": 0.00018184547315385082, "loss": 1.869, "step": 6115 }, { "epoch": 0.2190269844396297, "grad_norm": 1.2946780920028687, "learning_rate": 0.0001818388081466562, "loss": 1.4014, "step": 6116 }, { "epoch": 0.219062796569198, "grad_norm": 1.327081322669983, "learning_rate": 0.00018183214203842601, "loss": 1.8673, "step": 6117 }, { "epoch": 0.21909860869876627, "grad_norm": 1.738240122795105, "learning_rate": 0.00018182547482924988, "loss": 1.406, "step": 6118 }, { "epoch": 0.21913442082833456, "grad_norm": 2.0041754245758057, "learning_rate": 0.00018181880651921755, "loss": 1.6267, "step": 6119 }, { "epoch": 0.21917023295790283, "grad_norm": 1.6936300992965698, "learning_rate": 0.0001818121371084187, "loss": 1.741, "step": 6120 }, { "epoch": 0.21920604508747113, "grad_norm": 1.2888492345809937, "learning_rate": 0.00018180546659694307, "loss": 1.5933, "step": 6121 }, { "epoch": 0.2192418572170394, "grad_norm": 1.3462530374526978, "learning_rate": 0.0001817987949848804, "loss": 1.6054, "step": 6122 }, { "epoch": 0.2192776693466077, "grad_norm": 2.644855260848999, "learning_rate": 0.0001817921222723205, "loss": 1.7208, "step": 6123 }, { "epoch": 0.219313481476176, "grad_norm": 1.3023197650909424, "learning_rate": 0.00018178544845935308, "loss": 1.6019, "step": 6124 }, { "epoch": 0.21934929360574426, "grad_norm": 1.9801610708236694, "learning_rate": 0.00018177877354606797, "loss": 1.4413, "step": 6125 }, { "epoch": 0.21938510573531256, "grad_norm": 1.6524903774261475, "learning_rate": 0.00018177209753255492, "loss": 1.4086, "step": 6126 }, { "epoch": 0.21942091786488083, "grad_norm": 1.5688244104385376, "learning_rate": 0.00018176542041890376, "loss": 1.5965, "step": 6127 }, { "epoch": 0.21945672999444912, "grad_norm": 1.5412373542785645, "learning_rate": 0.00018175874220520438, "loss": 1.4583, "step": 6128 }, { "epoch": 0.2194925421240174, "grad_norm": 1.9981822967529297, "learning_rate": 0.00018175206289154655, "loss": 1.5695, "step": 6129 }, { "epoch": 0.2195283542535857, "grad_norm": 1.5729684829711914, "learning_rate": 0.00018174538247802015, "loss": 1.1199, "step": 6130 }, { "epoch": 0.21956416638315399, "grad_norm": 1.5781065225601196, "learning_rate": 0.00018173870096471512, "loss": 1.7226, "step": 6131 }, { "epoch": 0.21959997851272225, "grad_norm": 1.6586942672729492, "learning_rate": 0.00018173201835172128, "loss": 1.8269, "step": 6132 }, { "epoch": 0.21963579064229055, "grad_norm": 1.3786426782608032, "learning_rate": 0.00018172533463912857, "loss": 1.4942, "step": 6133 }, { "epoch": 0.21967160277185882, "grad_norm": 1.72449791431427, "learning_rate": 0.00018171864982702692, "loss": 1.2589, "step": 6134 }, { "epoch": 0.21970741490142712, "grad_norm": 1.6442193984985352, "learning_rate": 0.0001817119639155062, "loss": 1.3379, "step": 6135 }, { "epoch": 0.21974322703099539, "grad_norm": 1.8619656562805176, "learning_rate": 0.00018170527690465643, "loss": 1.5929, "step": 6136 }, { "epoch": 0.21977903916056368, "grad_norm": 1.7643412351608276, "learning_rate": 0.00018169858879456757, "loss": 1.5387, "step": 6137 }, { "epoch": 0.21981485129013198, "grad_norm": 1.723496437072754, "learning_rate": 0.00018169189958532953, "loss": 1.6536, "step": 6138 }, { "epoch": 0.21985066341970025, "grad_norm": 2.1918203830718994, "learning_rate": 0.0001816852092770324, "loss": 1.6178, "step": 6139 }, { "epoch": 0.21988647554926855, "grad_norm": 1.7967193126678467, "learning_rate": 0.00018167851786976612, "loss": 1.4998, "step": 6140 }, { "epoch": 0.21992228767883681, "grad_norm": 1.3858860731124878, "learning_rate": 0.00018167182536362074, "loss": 1.4789, "step": 6141 }, { "epoch": 0.2199580998084051, "grad_norm": 1.7353756427764893, "learning_rate": 0.00018166513175868633, "loss": 1.4175, "step": 6142 }, { "epoch": 0.21999391193797338, "grad_norm": 1.6364984512329102, "learning_rate": 0.0001816584370550529, "loss": 1.2205, "step": 6143 }, { "epoch": 0.22002972406754168, "grad_norm": 2.147890567779541, "learning_rate": 0.00018165174125281053, "loss": 1.4915, "step": 6144 }, { "epoch": 0.22006553619710997, "grad_norm": 1.781660556793213, "learning_rate": 0.0001816450443520493, "loss": 1.4868, "step": 6145 }, { "epoch": 0.22010134832667824, "grad_norm": 1.9566551446914673, "learning_rate": 0.00018163834635285931, "loss": 1.2187, "step": 6146 }, { "epoch": 0.22013716045624654, "grad_norm": 1.7707067728042603, "learning_rate": 0.00018163164725533068, "loss": 1.7138, "step": 6147 }, { "epoch": 0.2201729725858148, "grad_norm": 3.547220468521118, "learning_rate": 0.0001816249470595535, "loss": 1.4808, "step": 6148 }, { "epoch": 0.2202087847153831, "grad_norm": 2.155532121658325, "learning_rate": 0.000181618245765618, "loss": 1.5946, "step": 6149 }, { "epoch": 0.22024459684495137, "grad_norm": 2.2041211128234863, "learning_rate": 0.00018161154337361426, "loss": 1.5524, "step": 6150 }, { "epoch": 0.22028040897451967, "grad_norm": 1.5323847532272339, "learning_rate": 0.00018160483988363248, "loss": 1.6958, "step": 6151 }, { "epoch": 0.22031622110408797, "grad_norm": 1.5857058763504028, "learning_rate": 0.00018159813529576284, "loss": 1.9383, "step": 6152 }, { "epoch": 0.22035203323365624, "grad_norm": 1.6870907545089722, "learning_rate": 0.0001815914296100955, "loss": 1.7842, "step": 6153 }, { "epoch": 0.22038784536322453, "grad_norm": 1.9299302101135254, "learning_rate": 0.00018158472282672078, "loss": 1.8147, "step": 6154 }, { "epoch": 0.2204236574927928, "grad_norm": 1.6161481142044067, "learning_rate": 0.00018157801494572885, "loss": 1.6953, "step": 6155 }, { "epoch": 0.2204594696223611, "grad_norm": 1.6391772031784058, "learning_rate": 0.00018157130596720996, "loss": 1.6505, "step": 6156 }, { "epoch": 0.22049528175192937, "grad_norm": 1.602982759475708, "learning_rate": 0.0001815645958912543, "loss": 1.3066, "step": 6157 }, { "epoch": 0.22053109388149766, "grad_norm": 2.5656521320343018, "learning_rate": 0.0001815578847179523, "loss": 1.6071, "step": 6158 }, { "epoch": 0.22056690601106596, "grad_norm": 1.99832284450531, "learning_rate": 0.0001815511724473941, "loss": 1.7087, "step": 6159 }, { "epoch": 0.22060271814063423, "grad_norm": 2.7880725860595703, "learning_rate": 0.0001815444590796701, "loss": 1.4331, "step": 6160 }, { "epoch": 0.22063853027020253, "grad_norm": 2.317629337310791, "learning_rate": 0.00018153774461487058, "loss": 1.5764, "step": 6161 }, { "epoch": 0.2206743423997708, "grad_norm": 1.6050294637680054, "learning_rate": 0.00018153102905308589, "loss": 1.8059, "step": 6162 }, { "epoch": 0.2207101545293391, "grad_norm": 1.231059193611145, "learning_rate": 0.00018152431239440637, "loss": 1.6119, "step": 6163 }, { "epoch": 0.22074596665890736, "grad_norm": 1.5513490438461304, "learning_rate": 0.00018151759463892235, "loss": 1.6632, "step": 6164 }, { "epoch": 0.22078177878847566, "grad_norm": 1.990567684173584, "learning_rate": 0.00018151087578672427, "loss": 1.4703, "step": 6165 }, { "epoch": 0.22081759091804395, "grad_norm": 1.8540167808532715, "learning_rate": 0.00018150415583790253, "loss": 1.3144, "step": 6166 }, { "epoch": 0.22085340304761222, "grad_norm": 1.6363404989242554, "learning_rate": 0.00018149743479254745, "loss": 1.7437, "step": 6167 }, { "epoch": 0.22088921517718052, "grad_norm": 1.5769498348236084, "learning_rate": 0.00018149071265074955, "loss": 1.6028, "step": 6168 }, { "epoch": 0.2209250273067488, "grad_norm": 1.8275872468948364, "learning_rate": 0.0001814839894125992, "loss": 1.1498, "step": 6169 }, { "epoch": 0.22096083943631709, "grad_norm": 1.8236889839172363, "learning_rate": 0.0001814772650781869, "loss": 1.728, "step": 6170 }, { "epoch": 0.22099665156588535, "grad_norm": 2.3265621662139893, "learning_rate": 0.0001814705396476031, "loss": 1.3853, "step": 6171 }, { "epoch": 0.22103246369545365, "grad_norm": 1.8353328704833984, "learning_rate": 0.00018146381312093826, "loss": 1.526, "step": 6172 }, { "epoch": 0.22106827582502195, "grad_norm": 1.6437424421310425, "learning_rate": 0.00018145708549828287, "loss": 1.8824, "step": 6173 }, { "epoch": 0.22110408795459022, "grad_norm": 1.395546317100525, "learning_rate": 0.00018145035677972753, "loss": 1.7179, "step": 6174 }, { "epoch": 0.2211399000841585, "grad_norm": 1.4064130783081055, "learning_rate": 0.00018144362696536267, "loss": 1.5391, "step": 6175 }, { "epoch": 0.22117571221372678, "grad_norm": 2.1587369441986084, "learning_rate": 0.00018143689605527885, "loss": 1.1614, "step": 6176 }, { "epoch": 0.22121152434329508, "grad_norm": 1.8785380125045776, "learning_rate": 0.00018143016404956669, "loss": 1.1919, "step": 6177 }, { "epoch": 0.22124733647286335, "grad_norm": 1.5250529050827026, "learning_rate": 0.00018142343094831667, "loss": 1.7925, "step": 6178 }, { "epoch": 0.22128314860243165, "grad_norm": 1.775303840637207, "learning_rate": 0.0001814166967516194, "loss": 1.5447, "step": 6179 }, { "epoch": 0.22131896073199994, "grad_norm": 1.2775332927703857, "learning_rate": 0.00018140996145956552, "loss": 1.1843, "step": 6180 }, { "epoch": 0.2213547728615682, "grad_norm": 1.5202330350875854, "learning_rate": 0.00018140322507224563, "loss": 1.4731, "step": 6181 }, { "epoch": 0.2213905849911365, "grad_norm": 1.5258673429489136, "learning_rate": 0.00018139648758975032, "loss": 1.2282, "step": 6182 }, { "epoch": 0.22142639712070478, "grad_norm": 2.3224363327026367, "learning_rate": 0.00018138974901217027, "loss": 1.9237, "step": 6183 }, { "epoch": 0.22146220925027307, "grad_norm": 1.9350823163986206, "learning_rate": 0.00018138300933959615, "loss": 1.67, "step": 6184 }, { "epoch": 0.22149802137984134, "grad_norm": 1.8837072849273682, "learning_rate": 0.0001813762685721186, "loss": 1.4745, "step": 6185 }, { "epoch": 0.22153383350940964, "grad_norm": 1.769679307937622, "learning_rate": 0.00018136952670982833, "loss": 1.3859, "step": 6186 }, { "epoch": 0.22156964563897794, "grad_norm": 1.521981120109558, "learning_rate": 0.00018136278375281605, "loss": 1.5587, "step": 6187 }, { "epoch": 0.2216054577685462, "grad_norm": 1.510655403137207, "learning_rate": 0.00018135603970117242, "loss": 1.5694, "step": 6188 }, { "epoch": 0.2216412698981145, "grad_norm": 1.176619529724121, "learning_rate": 0.00018134929455498828, "loss": 1.48, "step": 6189 }, { "epoch": 0.22167708202768277, "grad_norm": 2.1085398197174072, "learning_rate": 0.0001813425483143543, "loss": 1.8069, "step": 6190 }, { "epoch": 0.22171289415725107, "grad_norm": 1.5834347009658813, "learning_rate": 0.00018133580097936123, "loss": 1.5686, "step": 6191 }, { "epoch": 0.22174870628681934, "grad_norm": 2.538158893585205, "learning_rate": 0.00018132905255009986, "loss": 1.6837, "step": 6192 }, { "epoch": 0.22178451841638763, "grad_norm": 1.5322290658950806, "learning_rate": 0.00018132230302666104, "loss": 1.6239, "step": 6193 }, { "epoch": 0.2218203305459559, "grad_norm": 2.453597068786621, "learning_rate": 0.0001813155524091355, "loss": 1.3409, "step": 6194 }, { "epoch": 0.2218561426755242, "grad_norm": 1.7916405200958252, "learning_rate": 0.00018130880069761412, "loss": 1.8466, "step": 6195 }, { "epoch": 0.2218919548050925, "grad_norm": 1.4588032960891724, "learning_rate": 0.00018130204789218769, "loss": 1.7948, "step": 6196 }, { "epoch": 0.22192776693466076, "grad_norm": 1.7840462923049927, "learning_rate": 0.00018129529399294706, "loss": 1.9397, "step": 6197 }, { "epoch": 0.22196357906422906, "grad_norm": 1.4719196557998657, "learning_rate": 0.00018128853899998312, "loss": 1.3483, "step": 6198 }, { "epoch": 0.22199939119379733, "grad_norm": 1.5863127708435059, "learning_rate": 0.00018128178291338678, "loss": 1.7581, "step": 6199 }, { "epoch": 0.22203520332336563, "grad_norm": 1.4045363664627075, "learning_rate": 0.00018127502573324887, "loss": 1.4722, "step": 6200 }, { "epoch": 0.2220710154529339, "grad_norm": 1.8057374954223633, "learning_rate": 0.00018126826745966032, "loss": 1.7084, "step": 6201 }, { "epoch": 0.2221068275825022, "grad_norm": 1.5089365243911743, "learning_rate": 0.00018126150809271208, "loss": 1.678, "step": 6202 }, { "epoch": 0.2221426397120705, "grad_norm": 1.7062714099884033, "learning_rate": 0.00018125474763249505, "loss": 1.7745, "step": 6203 }, { "epoch": 0.22217845184163876, "grad_norm": 2.236283540725708, "learning_rate": 0.00018124798607910018, "loss": 1.6298, "step": 6204 }, { "epoch": 0.22221426397120705, "grad_norm": 1.514384150505066, "learning_rate": 0.0001812412234326185, "loss": 1.5133, "step": 6205 }, { "epoch": 0.22225007610077532, "grad_norm": 1.520835041999817, "learning_rate": 0.00018123445969314095, "loss": 1.8003, "step": 6206 }, { "epoch": 0.22228588823034362, "grad_norm": 2.0588576793670654, "learning_rate": 0.00018122769486075854, "loss": 1.6132, "step": 6207 }, { "epoch": 0.2223217003599119, "grad_norm": 1.1865562200546265, "learning_rate": 0.00018122092893556224, "loss": 1.5413, "step": 6208 }, { "epoch": 0.22235751248948019, "grad_norm": 1.4218183755874634, "learning_rate": 0.0001812141619176431, "loss": 1.5644, "step": 6209 }, { "epoch": 0.22239332461904848, "grad_norm": 1.5924785137176514, "learning_rate": 0.00018120739380709218, "loss": 1.6539, "step": 6210 }, { "epoch": 0.22242913674861675, "grad_norm": 1.443805456161499, "learning_rate": 0.00018120062460400056, "loss": 1.3789, "step": 6211 }, { "epoch": 0.22246494887818505, "grad_norm": 1.639870285987854, "learning_rate": 0.00018119385430845925, "loss": 1.5284, "step": 6212 }, { "epoch": 0.22250076100775332, "grad_norm": 1.666635274887085, "learning_rate": 0.00018118708292055936, "loss": 1.8486, "step": 6213 }, { "epoch": 0.2225365731373216, "grad_norm": 2.6075167655944824, "learning_rate": 0.00018118031044039198, "loss": 1.778, "step": 6214 }, { "epoch": 0.22257238526688988, "grad_norm": 1.558547019958496, "learning_rate": 0.00018117353686804825, "loss": 1.4225, "step": 6215 }, { "epoch": 0.22260819739645818, "grad_norm": 1.4020191431045532, "learning_rate": 0.00018116676220361933, "loss": 1.5796, "step": 6216 }, { "epoch": 0.22264400952602648, "grad_norm": 2.120662212371826, "learning_rate": 0.00018115998644719627, "loss": 1.9943, "step": 6217 }, { "epoch": 0.22267982165559475, "grad_norm": 1.3515700101852417, "learning_rate": 0.0001811532095988703, "loss": 1.3811, "step": 6218 }, { "epoch": 0.22271563378516304, "grad_norm": 1.45658278465271, "learning_rate": 0.00018114643165873258, "loss": 1.5035, "step": 6219 }, { "epoch": 0.2227514459147313, "grad_norm": 1.6097304821014404, "learning_rate": 0.00018113965262687426, "loss": 1.524, "step": 6220 }, { "epoch": 0.2227872580442996, "grad_norm": 1.5943652391433716, "learning_rate": 0.00018113287250338662, "loss": 1.3009, "step": 6221 }, { "epoch": 0.22282307017386788, "grad_norm": 1.5684243440628052, "learning_rate": 0.0001811260912883608, "loss": 1.4074, "step": 6222 }, { "epoch": 0.22285888230343617, "grad_norm": 1.7942955493927002, "learning_rate": 0.0001811193089818881, "loss": 1.5083, "step": 6223 }, { "epoch": 0.22289469443300447, "grad_norm": 2.2910614013671875, "learning_rate": 0.0001811125255840597, "loss": 1.6178, "step": 6224 }, { "epoch": 0.22293050656257274, "grad_norm": 1.332371473312378, "learning_rate": 0.00018110574109496692, "loss": 1.4492, "step": 6225 }, { "epoch": 0.22296631869214104, "grad_norm": 1.8471901416778564, "learning_rate": 0.000181098955514701, "loss": 1.9102, "step": 6226 }, { "epoch": 0.2230021308217093, "grad_norm": 1.6000697612762451, "learning_rate": 0.00018109216884335325, "loss": 1.6102, "step": 6227 }, { "epoch": 0.2230379429512776, "grad_norm": 2.0699243545532227, "learning_rate": 0.00018108538108101496, "loss": 1.8961, "step": 6228 }, { "epoch": 0.22307375508084587, "grad_norm": 1.5482474565505981, "learning_rate": 0.00018107859222777747, "loss": 1.6179, "step": 6229 }, { "epoch": 0.22310956721041417, "grad_norm": 1.4630217552185059, "learning_rate": 0.0001810718022837321, "loss": 1.3147, "step": 6230 }, { "epoch": 0.22314537933998246, "grad_norm": 1.7212188243865967, "learning_rate": 0.00018106501124897024, "loss": 1.6115, "step": 6231 }, { "epoch": 0.22318119146955073, "grad_norm": 1.2010308504104614, "learning_rate": 0.00018105821912358318, "loss": 1.5784, "step": 6232 }, { "epoch": 0.22321700359911903, "grad_norm": 2.4285857677459717, "learning_rate": 0.00018105142590766235, "loss": 1.3719, "step": 6233 }, { "epoch": 0.2232528157286873, "grad_norm": 1.9801949262619019, "learning_rate": 0.00018104463160129912, "loss": 1.5422, "step": 6234 }, { "epoch": 0.2232886278582556, "grad_norm": 1.3408149480819702, "learning_rate": 0.00018103783620458495, "loss": 1.3496, "step": 6235 }, { "epoch": 0.22332443998782386, "grad_norm": 1.6955066919326782, "learning_rate": 0.0001810310397176112, "loss": 1.6563, "step": 6236 }, { "epoch": 0.22336025211739216, "grad_norm": 1.4549084901809692, "learning_rate": 0.0001810242421404693, "loss": 1.4026, "step": 6237 }, { "epoch": 0.22339606424696046, "grad_norm": 1.6285228729248047, "learning_rate": 0.00018101744347325078, "loss": 1.511, "step": 6238 }, { "epoch": 0.22343187637652873, "grad_norm": 1.6430824995040894, "learning_rate": 0.00018101064371604705, "loss": 1.3705, "step": 6239 }, { "epoch": 0.22346768850609702, "grad_norm": 2.0069923400878906, "learning_rate": 0.0001810038428689496, "loss": 1.7298, "step": 6240 }, { "epoch": 0.2235035006356653, "grad_norm": 1.5702707767486572, "learning_rate": 0.00018099704093204997, "loss": 1.7249, "step": 6241 }, { "epoch": 0.2235393127652336, "grad_norm": 1.7741975784301758, "learning_rate": 0.00018099023790543956, "loss": 1.6276, "step": 6242 }, { "epoch": 0.22357512489480186, "grad_norm": 2.8160290718078613, "learning_rate": 0.00018098343378921002, "loss": 1.3841, "step": 6243 }, { "epoch": 0.22361093702437015, "grad_norm": 1.9434565305709839, "learning_rate": 0.00018097662858345282, "loss": 1.2333, "step": 6244 }, { "epoch": 0.22364674915393845, "grad_norm": 1.4331356287002563, "learning_rate": 0.00018096982228825957, "loss": 1.6064, "step": 6245 }, { "epoch": 0.22368256128350672, "grad_norm": 1.8122775554656982, "learning_rate": 0.00018096301490372175, "loss": 1.5847, "step": 6246 }, { "epoch": 0.22371837341307502, "grad_norm": 1.7992037534713745, "learning_rate": 0.00018095620642993106, "loss": 1.5136, "step": 6247 }, { "epoch": 0.22375418554264329, "grad_norm": 2.3840503692626953, "learning_rate": 0.000180949396866979, "loss": 1.4274, "step": 6248 }, { "epoch": 0.22378999767221158, "grad_norm": 1.6450737714767456, "learning_rate": 0.00018094258621495724, "loss": 1.5595, "step": 6249 }, { "epoch": 0.22382580980177985, "grad_norm": 1.2827435731887817, "learning_rate": 0.00018093577447395737, "loss": 1.6185, "step": 6250 }, { "epoch": 0.22386162193134815, "grad_norm": 2.183157444000244, "learning_rate": 0.00018092896164407108, "loss": 1.3872, "step": 6251 }, { "epoch": 0.22389743406091644, "grad_norm": 2.581634521484375, "learning_rate": 0.00018092214772538994, "loss": 1.6593, "step": 6252 }, { "epoch": 0.2239332461904847, "grad_norm": 1.961072325706482, "learning_rate": 0.00018091533271800576, "loss": 1.5356, "step": 6253 }, { "epoch": 0.223969058320053, "grad_norm": 2.0827033519744873, "learning_rate": 0.00018090851662201011, "loss": 1.5634, "step": 6254 }, { "epoch": 0.22400487044962128, "grad_norm": 1.3981750011444092, "learning_rate": 0.00018090169943749476, "loss": 1.6067, "step": 6255 }, { "epoch": 0.22404068257918958, "grad_norm": 1.7544121742248535, "learning_rate": 0.00018089488116455137, "loss": 1.7618, "step": 6256 }, { "epoch": 0.22407649470875785, "grad_norm": 1.768568754196167, "learning_rate": 0.00018088806180327174, "loss": 1.7251, "step": 6257 }, { "epoch": 0.22411230683832614, "grad_norm": 1.7023133039474487, "learning_rate": 0.00018088124135374754, "loss": 1.3179, "step": 6258 }, { "epoch": 0.22414811896789444, "grad_norm": 1.566927433013916, "learning_rate": 0.00018087441981607056, "loss": 1.3815, "step": 6259 }, { "epoch": 0.2241839310974627, "grad_norm": 1.7557883262634277, "learning_rate": 0.00018086759719033261, "loss": 1.6312, "step": 6260 }, { "epoch": 0.224219743227031, "grad_norm": 1.8031694889068604, "learning_rate": 0.00018086077347662544, "loss": 1.3539, "step": 6261 }, { "epoch": 0.22425555535659927, "grad_norm": 1.501671552658081, "learning_rate": 0.00018085394867504087, "loss": 1.7447, "step": 6262 }, { "epoch": 0.22429136748616757, "grad_norm": 1.342667579650879, "learning_rate": 0.00018084712278567072, "loss": 1.5844, "step": 6263 }, { "epoch": 0.22432717961573584, "grad_norm": 2.0589170455932617, "learning_rate": 0.00018084029580860679, "loss": 2.0065, "step": 6264 }, { "epoch": 0.22436299174530414, "grad_norm": 1.7275969982147217, "learning_rate": 0.000180833467743941, "loss": 1.4656, "step": 6265 }, { "epoch": 0.22439880387487243, "grad_norm": 1.4489154815673828, "learning_rate": 0.00018082663859176514, "loss": 1.4036, "step": 6266 }, { "epoch": 0.2244346160044407, "grad_norm": 1.4381591081619263, "learning_rate": 0.00018081980835217115, "loss": 1.5086, "step": 6267 }, { "epoch": 0.224470428134009, "grad_norm": 1.5848140716552734, "learning_rate": 0.00018081297702525083, "loss": 1.88, "step": 6268 }, { "epoch": 0.22450624026357727, "grad_norm": 1.8268117904663086, "learning_rate": 0.0001808061446110962, "loss": 1.713, "step": 6269 }, { "epoch": 0.22454205239314556, "grad_norm": 2.428276538848877, "learning_rate": 0.0001807993111097991, "loss": 1.7823, "step": 6270 }, { "epoch": 0.22457786452271383, "grad_norm": 1.898605465888977, "learning_rate": 0.0001807924765214515, "loss": 1.4447, "step": 6271 }, { "epoch": 0.22461367665228213, "grad_norm": 1.346147894859314, "learning_rate": 0.00018078564084614534, "loss": 1.4593, "step": 6272 }, { "epoch": 0.22464948878185043, "grad_norm": 1.8311480283737183, "learning_rate": 0.0001807788040839726, "loss": 1.6737, "step": 6273 }, { "epoch": 0.2246853009114187, "grad_norm": 1.3992347717285156, "learning_rate": 0.0001807719662350252, "loss": 1.5412, "step": 6274 }, { "epoch": 0.224721113040987, "grad_norm": 1.5278624296188354, "learning_rate": 0.00018076512729939522, "loss": 1.6334, "step": 6275 }, { "epoch": 0.22475692517055526, "grad_norm": 1.8901277780532837, "learning_rate": 0.00018075828727717464, "loss": 1.8401, "step": 6276 }, { "epoch": 0.22479273730012356, "grad_norm": 2.278604745864868, "learning_rate": 0.00018075144616845544, "loss": 1.7158, "step": 6277 }, { "epoch": 0.22482854942969183, "grad_norm": 1.5036617517471313, "learning_rate": 0.00018074460397332973, "loss": 1.7371, "step": 6278 }, { "epoch": 0.22486436155926012, "grad_norm": 1.6316254138946533, "learning_rate": 0.00018073776069188954, "loss": 1.6364, "step": 6279 }, { "epoch": 0.22490017368882842, "grad_norm": 1.824449062347412, "learning_rate": 0.0001807309163242269, "loss": 1.2859, "step": 6280 }, { "epoch": 0.2249359858183967, "grad_norm": 1.523816466331482, "learning_rate": 0.0001807240708704339, "loss": 1.6968, "step": 6281 }, { "epoch": 0.22497179794796499, "grad_norm": 1.8160182237625122, "learning_rate": 0.0001807172243306027, "loss": 1.5923, "step": 6282 }, { "epoch": 0.22500761007753325, "grad_norm": 1.6295522451400757, "learning_rate": 0.00018071037670482532, "loss": 1.5968, "step": 6283 }, { "epoch": 0.22504342220710155, "grad_norm": 1.463212251663208, "learning_rate": 0.00018070352799319395, "loss": 1.5347, "step": 6284 }, { "epoch": 0.22507923433666982, "grad_norm": 2.812549114227295, "learning_rate": 0.0001806966781958007, "loss": 1.654, "step": 6285 }, { "epoch": 0.22511504646623812, "grad_norm": 2.011024236679077, "learning_rate": 0.00018068982731273773, "loss": 1.9352, "step": 6286 }, { "epoch": 0.2251508585958064, "grad_norm": 1.4859150648117065, "learning_rate": 0.00018068297534409725, "loss": 1.6877, "step": 6287 }, { "epoch": 0.22518667072537468, "grad_norm": 1.5548865795135498, "learning_rate": 0.00018067612228997137, "loss": 1.53, "step": 6288 }, { "epoch": 0.22522248285494298, "grad_norm": 1.7031159400939941, "learning_rate": 0.00018066926815045236, "loss": 1.8067, "step": 6289 }, { "epoch": 0.22525829498451125, "grad_norm": 1.9277985095977783, "learning_rate": 0.00018066241292563238, "loss": 1.3285, "step": 6290 }, { "epoch": 0.22529410711407954, "grad_norm": 1.4769515991210938, "learning_rate": 0.00018065555661560368, "loss": 1.6458, "step": 6291 }, { "epoch": 0.2253299192436478, "grad_norm": 2.014338970184326, "learning_rate": 0.00018064869922045852, "loss": 1.4688, "step": 6292 }, { "epoch": 0.2253657313732161, "grad_norm": 2.711336851119995, "learning_rate": 0.00018064184074028915, "loss": 1.5906, "step": 6293 }, { "epoch": 0.22540154350278438, "grad_norm": 1.8387864828109741, "learning_rate": 0.0001806349811751878, "loss": 1.6775, "step": 6294 }, { "epoch": 0.22543735563235268, "grad_norm": 1.484028935432434, "learning_rate": 0.00018062812052524683, "loss": 1.4705, "step": 6295 }, { "epoch": 0.22547316776192097, "grad_norm": 1.9155514240264893, "learning_rate": 0.00018062125879055846, "loss": 1.6196, "step": 6296 }, { "epoch": 0.22550897989148924, "grad_norm": 2.07671856880188, "learning_rate": 0.00018061439597121508, "loss": 1.6635, "step": 6297 }, { "epoch": 0.22554479202105754, "grad_norm": 1.2981393337249756, "learning_rate": 0.000180607532067309, "loss": 1.3212, "step": 6298 }, { "epoch": 0.2255806041506258, "grad_norm": 1.5452812910079956, "learning_rate": 0.0001806006670789325, "loss": 1.7199, "step": 6299 }, { "epoch": 0.2256164162801941, "grad_norm": 2.042917251586914, "learning_rate": 0.00018059380100617802, "loss": 1.561, "step": 6300 }, { "epoch": 0.22565222840976237, "grad_norm": 2.002063035964966, "learning_rate": 0.0001805869338491379, "loss": 1.4581, "step": 6301 }, { "epoch": 0.22568804053933067, "grad_norm": 1.6189521551132202, "learning_rate": 0.00018058006560790453, "loss": 1.2429, "step": 6302 }, { "epoch": 0.22572385266889897, "grad_norm": 1.7002862691879272, "learning_rate": 0.00018057319628257034, "loss": 1.6999, "step": 6303 }, { "epoch": 0.22575966479846724, "grad_norm": 1.913028597831726, "learning_rate": 0.0001805663258732277, "loss": 1.9182, "step": 6304 }, { "epoch": 0.22579547692803553, "grad_norm": 1.3990799188613892, "learning_rate": 0.0001805594543799691, "loss": 1.6439, "step": 6305 }, { "epoch": 0.2258312890576038, "grad_norm": 1.9262644052505493, "learning_rate": 0.00018055258180288696, "loss": 1.6578, "step": 6306 }, { "epoch": 0.2258671011871721, "grad_norm": 1.1598010063171387, "learning_rate": 0.0001805457081420737, "loss": 1.645, "step": 6307 }, { "epoch": 0.22590291331674037, "grad_norm": 2.582381248474121, "learning_rate": 0.00018053883339762183, "loss": 1.9006, "step": 6308 }, { "epoch": 0.22593872544630866, "grad_norm": 1.5848309993743896, "learning_rate": 0.00018053195756962388, "loss": 1.4253, "step": 6309 }, { "epoch": 0.22597453757587696, "grad_norm": 1.6140457391738892, "learning_rate": 0.0001805250806581723, "loss": 1.3751, "step": 6310 }, { "epoch": 0.22601034970544523, "grad_norm": 1.7949855327606201, "learning_rate": 0.00018051820266335963, "loss": 1.4448, "step": 6311 }, { "epoch": 0.22604616183501353, "grad_norm": 1.4919768571853638, "learning_rate": 0.0001805113235852784, "loss": 1.4016, "step": 6312 }, { "epoch": 0.2260819739645818, "grad_norm": 1.2824842929840088, "learning_rate": 0.00018050444342402114, "loss": 1.3802, "step": 6313 }, { "epoch": 0.2261177860941501, "grad_norm": 1.514653205871582, "learning_rate": 0.0001804975621796805, "loss": 1.4011, "step": 6314 }, { "epoch": 0.22615359822371836, "grad_norm": 1.4949651956558228, "learning_rate": 0.00018049067985234895, "loss": 1.3139, "step": 6315 }, { "epoch": 0.22618941035328666, "grad_norm": 1.9180841445922852, "learning_rate": 0.00018048379644211915, "loss": 1.4127, "step": 6316 }, { "epoch": 0.22622522248285495, "grad_norm": 1.3751518726348877, "learning_rate": 0.00018047691194908368, "loss": 1.4633, "step": 6317 }, { "epoch": 0.22626103461242322, "grad_norm": 2.1771159172058105, "learning_rate": 0.00018047002637333517, "loss": 1.6793, "step": 6318 }, { "epoch": 0.22629684674199152, "grad_norm": 1.4497225284576416, "learning_rate": 0.00018046313971496622, "loss": 1.4479, "step": 6319 }, { "epoch": 0.2263326588715598, "grad_norm": 1.9885940551757812, "learning_rate": 0.00018045625197406957, "loss": 1.4871, "step": 6320 }, { "epoch": 0.22636847100112809, "grad_norm": 1.246687412261963, "learning_rate": 0.00018044936315073779, "loss": 1.6281, "step": 6321 }, { "epoch": 0.22640428313069635, "grad_norm": 3.3700385093688965, "learning_rate": 0.0001804424732450636, "loss": 1.9536, "step": 6322 }, { "epoch": 0.22644009526026465, "grad_norm": 1.9437425136566162, "learning_rate": 0.0001804355822571397, "loss": 1.7087, "step": 6323 }, { "epoch": 0.22647590738983295, "grad_norm": 1.4002232551574707, "learning_rate": 0.00018042869018705882, "loss": 1.37, "step": 6324 }, { "epoch": 0.22651171951940122, "grad_norm": 1.4852545261383057, "learning_rate": 0.00018042179703491365, "loss": 1.4754, "step": 6325 }, { "epoch": 0.2265475316489695, "grad_norm": 1.689438819885254, "learning_rate": 0.00018041490280079693, "loss": 1.6379, "step": 6326 }, { "epoch": 0.22658334377853778, "grad_norm": 1.4079216718673706, "learning_rate": 0.00018040800748480142, "loss": 1.4282, "step": 6327 }, { "epoch": 0.22661915590810608, "grad_norm": 1.5791819095611572, "learning_rate": 0.00018040111108701988, "loss": 1.7066, "step": 6328 }, { "epoch": 0.22665496803767435, "grad_norm": 1.8294531106948853, "learning_rate": 0.00018039421360754513, "loss": 1.7546, "step": 6329 }, { "epoch": 0.22669078016724264, "grad_norm": 1.7738605737686157, "learning_rate": 0.0001803873150464699, "loss": 1.3616, "step": 6330 }, { "epoch": 0.22672659229681094, "grad_norm": 1.9782477617263794, "learning_rate": 0.00018038041540388705, "loss": 1.3159, "step": 6331 }, { "epoch": 0.2267624044263792, "grad_norm": 1.4635730981826782, "learning_rate": 0.00018037351467988942, "loss": 1.4511, "step": 6332 }, { "epoch": 0.2267982165559475, "grad_norm": 1.2722338438034058, "learning_rate": 0.0001803666128745698, "loss": 1.2824, "step": 6333 }, { "epoch": 0.22683402868551578, "grad_norm": 1.7284460067749023, "learning_rate": 0.00018035970998802106, "loss": 1.5667, "step": 6334 }, { "epoch": 0.22686984081508407, "grad_norm": 2.6332521438598633, "learning_rate": 0.0001803528060203361, "loss": 1.4429, "step": 6335 }, { "epoch": 0.22690565294465234, "grad_norm": 1.378933310508728, "learning_rate": 0.00018034590097160778, "loss": 1.5293, "step": 6336 }, { "epoch": 0.22694146507422064, "grad_norm": 1.7138888835906982, "learning_rate": 0.000180338994841929, "loss": 1.4708, "step": 6337 }, { "epoch": 0.22697727720378894, "grad_norm": 1.4144269227981567, "learning_rate": 0.00018033208763139266, "loss": 1.578, "step": 6338 }, { "epoch": 0.2270130893333572, "grad_norm": 1.4831655025482178, "learning_rate": 0.0001803251793400917, "loss": 1.6398, "step": 6339 }, { "epoch": 0.2270489014629255, "grad_norm": 1.3129905462265015, "learning_rate": 0.0001803182699681191, "loss": 1.4506, "step": 6340 }, { "epoch": 0.22708471359249377, "grad_norm": 1.9662981033325195, "learning_rate": 0.00018031135951556774, "loss": 1.3558, "step": 6341 }, { "epoch": 0.22712052572206207, "grad_norm": 2.3339595794677734, "learning_rate": 0.00018030444798253066, "loss": 1.4351, "step": 6342 }, { "epoch": 0.22715633785163034, "grad_norm": 1.4761534929275513, "learning_rate": 0.0001802975353691008, "loss": 1.6159, "step": 6343 }, { "epoch": 0.22719214998119863, "grad_norm": 1.3196064233779907, "learning_rate": 0.00018029062167537117, "loss": 1.6138, "step": 6344 }, { "epoch": 0.22722796211076693, "grad_norm": 2.0419766902923584, "learning_rate": 0.00018028370690143482, "loss": 1.8339, "step": 6345 }, { "epoch": 0.2272637742403352, "grad_norm": 1.5514767169952393, "learning_rate": 0.00018027679104738473, "loss": 1.3189, "step": 6346 }, { "epoch": 0.2272995863699035, "grad_norm": 2.7501816749572754, "learning_rate": 0.00018026987411331398, "loss": 1.8378, "step": 6347 }, { "epoch": 0.22733539849947176, "grad_norm": 1.7505801916122437, "learning_rate": 0.0001802629560993156, "loss": 1.1969, "step": 6348 }, { "epoch": 0.22737121062904006, "grad_norm": 1.4281238317489624, "learning_rate": 0.0001802560370054827, "loss": 1.78, "step": 6349 }, { "epoch": 0.22740702275860833, "grad_norm": 1.6368708610534668, "learning_rate": 0.00018024911683190833, "loss": 1.3031, "step": 6350 }, { "epoch": 0.22744283488817663, "grad_norm": 1.8474522829055786, "learning_rate": 0.0001802421955786856, "loss": 1.8179, "step": 6351 }, { "epoch": 0.22747864701774492, "grad_norm": 1.4447168111801147, "learning_rate": 0.00018023527324590764, "loss": 1.4314, "step": 6352 }, { "epoch": 0.2275144591473132, "grad_norm": 1.9680529832839966, "learning_rate": 0.0001802283498336676, "loss": 1.5552, "step": 6353 }, { "epoch": 0.2275502712768815, "grad_norm": 1.547568917274475, "learning_rate": 0.00018022142534205858, "loss": 1.5545, "step": 6354 }, { "epoch": 0.22758608340644976, "grad_norm": 1.4191595315933228, "learning_rate": 0.00018021449977117374, "loss": 1.359, "step": 6355 }, { "epoch": 0.22762189553601805, "grad_norm": 1.7385724782943726, "learning_rate": 0.00018020757312110628, "loss": 1.6338, "step": 6356 }, { "epoch": 0.22765770766558632, "grad_norm": 1.5614619255065918, "learning_rate": 0.0001802006453919494, "loss": 1.7259, "step": 6357 }, { "epoch": 0.22769351979515462, "grad_norm": 1.4145011901855469, "learning_rate": 0.0001801937165837963, "loss": 1.7155, "step": 6358 }, { "epoch": 0.22772933192472292, "grad_norm": 1.572713017463684, "learning_rate": 0.00018018678669674015, "loss": 1.7646, "step": 6359 }, { "epoch": 0.22776514405429119, "grad_norm": 1.4809653759002686, "learning_rate": 0.00018017985573087425, "loss": 1.4841, "step": 6360 }, { "epoch": 0.22780095618385948, "grad_norm": 2.632725715637207, "learning_rate": 0.0001801729236862918, "loss": 1.9187, "step": 6361 }, { "epoch": 0.22783676831342775, "grad_norm": 1.5056543350219727, "learning_rate": 0.0001801659905630861, "loss": 1.6402, "step": 6362 }, { "epoch": 0.22787258044299605, "grad_norm": 1.3668396472930908, "learning_rate": 0.00018015905636135037, "loss": 1.4198, "step": 6363 }, { "epoch": 0.22790839257256432, "grad_norm": 1.650078296661377, "learning_rate": 0.00018015212108117793, "loss": 1.8745, "step": 6364 }, { "epoch": 0.2279442047021326, "grad_norm": 2.297715902328491, "learning_rate": 0.0001801451847226621, "loss": 1.8726, "step": 6365 }, { "epoch": 0.2279800168317009, "grad_norm": 1.448528528213501, "learning_rate": 0.0001801382472858962, "loss": 1.5484, "step": 6366 }, { "epoch": 0.22801582896126918, "grad_norm": 1.730246901512146, "learning_rate": 0.00018013130877097357, "loss": 1.4851, "step": 6367 }, { "epoch": 0.22805164109083748, "grad_norm": 1.6730163097381592, "learning_rate": 0.0001801243691779875, "loss": 1.522, "step": 6368 }, { "epoch": 0.22808745322040574, "grad_norm": 1.6910569667816162, "learning_rate": 0.00018011742850703146, "loss": 1.4104, "step": 6369 }, { "epoch": 0.22812326534997404, "grad_norm": 1.7938308715820312, "learning_rate": 0.00018011048675819872, "loss": 1.4716, "step": 6370 }, { "epoch": 0.2281590774795423, "grad_norm": 1.5565799474716187, "learning_rate": 0.0001801035439315827, "loss": 1.5583, "step": 6371 }, { "epoch": 0.2281948896091106, "grad_norm": 1.8678364753723145, "learning_rate": 0.00018009660002727684, "loss": 1.5625, "step": 6372 }, { "epoch": 0.2282307017386789, "grad_norm": 1.3503905534744263, "learning_rate": 0.00018008965504537455, "loss": 1.5808, "step": 6373 }, { "epoch": 0.22826651386824717, "grad_norm": 1.5878171920776367, "learning_rate": 0.00018008270898596927, "loss": 1.7811, "step": 6374 }, { "epoch": 0.22830232599781547, "grad_norm": 1.4853515625, "learning_rate": 0.00018007576184915443, "loss": 1.5756, "step": 6375 }, { "epoch": 0.22833813812738374, "grad_norm": 1.5101004838943481, "learning_rate": 0.00018006881363502348, "loss": 1.3801, "step": 6376 }, { "epoch": 0.22837395025695204, "grad_norm": 1.6938331127166748, "learning_rate": 0.00018006186434366996, "loss": 1.7725, "step": 6377 }, { "epoch": 0.2284097623865203, "grad_norm": 2.058910846710205, "learning_rate": 0.0001800549139751873, "loss": 1.5681, "step": 6378 }, { "epoch": 0.2284455745160886, "grad_norm": 1.5402759313583374, "learning_rate": 0.00018004796252966908, "loss": 1.4617, "step": 6379 }, { "epoch": 0.2284813866456569, "grad_norm": 1.5306357145309448, "learning_rate": 0.00018004101000720872, "loss": 1.7913, "step": 6380 }, { "epoch": 0.22851719877522517, "grad_norm": 1.2929790019989014, "learning_rate": 0.00018003405640789987, "loss": 1.553, "step": 6381 }, { "epoch": 0.22855301090479346, "grad_norm": 2.393298387527466, "learning_rate": 0.00018002710173183596, "loss": 1.6883, "step": 6382 }, { "epoch": 0.22858882303436173, "grad_norm": 2.0982320308685303, "learning_rate": 0.00018002014597911066, "loss": 1.6216, "step": 6383 }, { "epoch": 0.22862463516393003, "grad_norm": 1.3045873641967773, "learning_rate": 0.00018001318914981753, "loss": 1.4899, "step": 6384 }, { "epoch": 0.2286604472934983, "grad_norm": 2.3285608291625977, "learning_rate": 0.00018000623124405014, "loss": 1.4443, "step": 6385 }, { "epoch": 0.2286962594230666, "grad_norm": 1.8748106956481934, "learning_rate": 0.0001799992722619021, "loss": 1.2426, "step": 6386 }, { "epoch": 0.22873207155263486, "grad_norm": 1.3914124965667725, "learning_rate": 0.00017999231220346707, "loss": 1.5566, "step": 6387 }, { "epoch": 0.22876788368220316, "grad_norm": 1.7344447374343872, "learning_rate": 0.00017998535106883862, "loss": 1.052, "step": 6388 }, { "epoch": 0.22880369581177146, "grad_norm": 1.5599722862243652, "learning_rate": 0.00017997838885811047, "loss": 1.6529, "step": 6389 }, { "epoch": 0.22883950794133973, "grad_norm": 2.0522711277008057, "learning_rate": 0.00017997142557137625, "loss": 1.4925, "step": 6390 }, { "epoch": 0.22887532007090802, "grad_norm": 1.712795376777649, "learning_rate": 0.00017996446120872967, "loss": 1.7085, "step": 6391 }, { "epoch": 0.2289111322004763, "grad_norm": 1.9475706815719604, "learning_rate": 0.00017995749577026443, "loss": 1.6277, "step": 6392 }, { "epoch": 0.2289469443300446, "grad_norm": 1.562954068183899, "learning_rate": 0.0001799505292560742, "loss": 1.8237, "step": 6393 }, { "epoch": 0.22898275645961286, "grad_norm": 1.4970970153808594, "learning_rate": 0.00017994356166625271, "loss": 1.6119, "step": 6394 }, { "epoch": 0.22901856858918115, "grad_norm": 1.8240835666656494, "learning_rate": 0.0001799365930008937, "loss": 1.7235, "step": 6395 }, { "epoch": 0.22905438071874945, "grad_norm": 1.410115361213684, "learning_rate": 0.000179929623260091, "loss": 1.429, "step": 6396 }, { "epoch": 0.22909019284831772, "grad_norm": 1.732579231262207, "learning_rate": 0.0001799226524439383, "loss": 1.6388, "step": 6397 }, { "epoch": 0.22912600497788602, "grad_norm": 2.0173988342285156, "learning_rate": 0.0001799156805525294, "loss": 1.6149, "step": 6398 }, { "epoch": 0.22916181710745429, "grad_norm": 1.4980366230010986, "learning_rate": 0.00017990870758595811, "loss": 1.5596, "step": 6399 }, { "epoch": 0.22919762923702258, "grad_norm": 2.1325318813323975, "learning_rate": 0.0001799017335443182, "loss": 1.5557, "step": 6400 }, { "epoch": 0.22923344136659085, "grad_norm": 1.7884522676467896, "learning_rate": 0.00017989475842770358, "loss": 1.6517, "step": 6401 }, { "epoch": 0.22926925349615915, "grad_norm": 2.133835554122925, "learning_rate": 0.00017988778223620799, "loss": 1.7446, "step": 6402 }, { "epoch": 0.22930506562572744, "grad_norm": 1.8718260526657104, "learning_rate": 0.0001798808049699254, "loss": 1.7916, "step": 6403 }, { "epoch": 0.2293408777552957, "grad_norm": 1.6703238487243652, "learning_rate": 0.00017987382662894955, "loss": 1.5125, "step": 6404 }, { "epoch": 0.229376689884864, "grad_norm": 1.7600868940353394, "learning_rate": 0.00017986684721337442, "loss": 1.5224, "step": 6405 }, { "epoch": 0.22941250201443228, "grad_norm": 2.184349298477173, "learning_rate": 0.00017985986672329392, "loss": 1.5551, "step": 6406 }, { "epoch": 0.22944831414400058, "grad_norm": 1.4052538871765137, "learning_rate": 0.0001798528851588019, "loss": 1.7645, "step": 6407 }, { "epoch": 0.22948412627356884, "grad_norm": 1.7552518844604492, "learning_rate": 0.0001798459025199923, "loss": 1.5787, "step": 6408 }, { "epoch": 0.22951993840313714, "grad_norm": 1.7903473377227783, "learning_rate": 0.00017983891880695907, "loss": 1.6544, "step": 6409 }, { "epoch": 0.22955575053270544, "grad_norm": 1.63605797290802, "learning_rate": 0.00017983193401979616, "loss": 1.7336, "step": 6410 }, { "epoch": 0.2295915626622737, "grad_norm": 1.7833870649337769, "learning_rate": 0.0001798249481585976, "loss": 1.425, "step": 6411 }, { "epoch": 0.229627374791842, "grad_norm": 1.5450332164764404, "learning_rate": 0.0001798179612234573, "loss": 1.3434, "step": 6412 }, { "epoch": 0.22966318692141027, "grad_norm": 1.3541749715805054, "learning_rate": 0.0001798109732144693, "loss": 1.5277, "step": 6413 }, { "epoch": 0.22969899905097857, "grad_norm": 2.7870755195617676, "learning_rate": 0.0001798039841317276, "loss": 1.5438, "step": 6414 }, { "epoch": 0.22973481118054684, "grad_norm": 1.9439632892608643, "learning_rate": 0.00017979699397532625, "loss": 1.6845, "step": 6415 }, { "epoch": 0.22977062331011514, "grad_norm": 1.472992181777954, "learning_rate": 0.00017979000274535926, "loss": 1.59, "step": 6416 }, { "epoch": 0.22980643543968343, "grad_norm": 1.9579699039459229, "learning_rate": 0.0001797830104419207, "loss": 1.3821, "step": 6417 }, { "epoch": 0.2298422475692517, "grad_norm": 1.2323920726776123, "learning_rate": 0.00017977601706510465, "loss": 1.5868, "step": 6418 }, { "epoch": 0.22987805969882, "grad_norm": 1.9308135509490967, "learning_rate": 0.0001797690226150052, "loss": 1.5732, "step": 6419 }, { "epoch": 0.22991387182838827, "grad_norm": 1.283857822418213, "learning_rate": 0.00017976202709171643, "loss": 1.3582, "step": 6420 }, { "epoch": 0.22994968395795656, "grad_norm": 1.7926145792007446, "learning_rate": 0.0001797550304953325, "loss": 1.542, "step": 6421 }, { "epoch": 0.22998549608752483, "grad_norm": 2.011465072631836, "learning_rate": 0.00017974803282594747, "loss": 1.3821, "step": 6422 }, { "epoch": 0.23002130821709313, "grad_norm": 1.5812910795211792, "learning_rate": 0.00017974103408365557, "loss": 1.5659, "step": 6423 }, { "epoch": 0.23005712034666143, "grad_norm": 1.8776155710220337, "learning_rate": 0.00017973403426855088, "loss": 1.7368, "step": 6424 }, { "epoch": 0.2300929324762297, "grad_norm": 1.4800854921340942, "learning_rate": 0.00017972703338072762, "loss": 1.6797, "step": 6425 }, { "epoch": 0.230128744605798, "grad_norm": 1.9689767360687256, "learning_rate": 0.00017972003142027997, "loss": 2.1101, "step": 6426 }, { "epoch": 0.23016455673536626, "grad_norm": 1.2748472690582275, "learning_rate": 0.00017971302838730213, "loss": 1.5971, "step": 6427 }, { "epoch": 0.23020036886493456, "grad_norm": 1.3918734788894653, "learning_rate": 0.00017970602428188834, "loss": 1.4222, "step": 6428 }, { "epoch": 0.23023618099450283, "grad_norm": 1.911766529083252, "learning_rate": 0.00017969901910413276, "loss": 1.3167, "step": 6429 }, { "epoch": 0.23027199312407112, "grad_norm": 1.62160325050354, "learning_rate": 0.00017969201285412972, "loss": 1.6437, "step": 6430 }, { "epoch": 0.23030780525363942, "grad_norm": 1.9006664752960205, "learning_rate": 0.0001796850055319734, "loss": 1.3403, "step": 6431 }, { "epoch": 0.2303436173832077, "grad_norm": 3.353529691696167, "learning_rate": 0.00017967799713775815, "loss": 1.7944, "step": 6432 }, { "epoch": 0.23037942951277599, "grad_norm": 2.1918132305145264, "learning_rate": 0.00017967098767157822, "loss": 1.7478, "step": 6433 }, { "epoch": 0.23041524164234425, "grad_norm": 1.9099441766738892, "learning_rate": 0.00017966397713352792, "loss": 1.2382, "step": 6434 }, { "epoch": 0.23045105377191255, "grad_norm": 1.6920967102050781, "learning_rate": 0.00017965696552370156, "loss": 1.4024, "step": 6435 }, { "epoch": 0.23048686590148082, "grad_norm": 1.8513035774230957, "learning_rate": 0.00017964995284219348, "loss": 1.4771, "step": 6436 }, { "epoch": 0.23052267803104912, "grad_norm": 2.202528476715088, "learning_rate": 0.00017964293908909803, "loss": 1.9447, "step": 6437 }, { "epoch": 0.2305584901606174, "grad_norm": 1.4865727424621582, "learning_rate": 0.00017963592426450956, "loss": 1.4472, "step": 6438 }, { "epoch": 0.23059430229018568, "grad_norm": 1.3838335275650024, "learning_rate": 0.0001796289083685225, "loss": 1.7319, "step": 6439 }, { "epoch": 0.23063011441975398, "grad_norm": 1.7137506008148193, "learning_rate": 0.0001796218914012311, "loss": 1.6542, "step": 6440 }, { "epoch": 0.23066592654932225, "grad_norm": 1.3480849266052246, "learning_rate": 0.0001796148733627299, "loss": 1.6549, "step": 6441 }, { "epoch": 0.23070173867889054, "grad_norm": 1.6950236558914185, "learning_rate": 0.00017960785425311332, "loss": 1.503, "step": 6442 }, { "epoch": 0.2307375508084588, "grad_norm": 2.1359996795654297, "learning_rate": 0.0001796008340724757, "loss": 1.5654, "step": 6443 }, { "epoch": 0.2307733629380271, "grad_norm": 1.839069128036499, "learning_rate": 0.00017959381282091152, "loss": 1.5582, "step": 6444 }, { "epoch": 0.2308091750675954, "grad_norm": 2.33095383644104, "learning_rate": 0.0001795867904985153, "loss": 1.7936, "step": 6445 }, { "epoch": 0.23084498719716368, "grad_norm": 1.4011579751968384, "learning_rate": 0.00017957976710538144, "loss": 1.7156, "step": 6446 }, { "epoch": 0.23088079932673197, "grad_norm": 1.6660329103469849, "learning_rate": 0.00017957274264160448, "loss": 1.8214, "step": 6447 }, { "epoch": 0.23091661145630024, "grad_norm": 1.4621220827102661, "learning_rate": 0.0001795657171072789, "loss": 1.6597, "step": 6448 }, { "epoch": 0.23095242358586854, "grad_norm": 1.8586870431900024, "learning_rate": 0.00017955869050249925, "loss": 2.0127, "step": 6449 }, { "epoch": 0.2309882357154368, "grad_norm": 1.8170366287231445, "learning_rate": 0.00017955166282736002, "loss": 1.6028, "step": 6450 }, { "epoch": 0.2310240478450051, "grad_norm": 1.4119073152542114, "learning_rate": 0.0001795446340819558, "loss": 1.1546, "step": 6451 }, { "epoch": 0.2310598599745734, "grad_norm": 1.4222018718719482, "learning_rate": 0.0001795376042663811, "loss": 1.3849, "step": 6452 }, { "epoch": 0.23109567210414167, "grad_norm": 1.6756181716918945, "learning_rate": 0.00017953057338073055, "loss": 1.6725, "step": 6453 }, { "epoch": 0.23113148423370997, "grad_norm": 1.8242745399475098, "learning_rate": 0.00017952354142509872, "loss": 1.5498, "step": 6454 }, { "epoch": 0.23116729636327824, "grad_norm": 1.50627601146698, "learning_rate": 0.00017951650839958023, "loss": 1.4107, "step": 6455 }, { "epoch": 0.23120310849284653, "grad_norm": 1.5063188076019287, "learning_rate": 0.0001795094743042697, "loss": 1.5205, "step": 6456 }, { "epoch": 0.2312389206224148, "grad_norm": 1.6867979764938354, "learning_rate": 0.00017950243913926171, "loss": 1.4739, "step": 6457 }, { "epoch": 0.2312747327519831, "grad_norm": 2.8445029258728027, "learning_rate": 0.000179495402904651, "loss": 1.8396, "step": 6458 }, { "epoch": 0.2313105448815514, "grad_norm": 1.4332222938537598, "learning_rate": 0.00017948836560053216, "loss": 1.3423, "step": 6459 }, { "epoch": 0.23134635701111966, "grad_norm": 1.8352477550506592, "learning_rate": 0.00017948132722699992, "loss": 1.7802, "step": 6460 }, { "epoch": 0.23138216914068796, "grad_norm": 1.4083185195922852, "learning_rate": 0.0001794742877841489, "loss": 1.6408, "step": 6461 }, { "epoch": 0.23141798127025623, "grad_norm": 1.7609177827835083, "learning_rate": 0.00017946724727207388, "loss": 1.3707, "step": 6462 }, { "epoch": 0.23145379339982453, "grad_norm": 2.6420741081237793, "learning_rate": 0.00017946020569086955, "loss": 1.2318, "step": 6463 }, { "epoch": 0.2314896055293928, "grad_norm": 1.4578214883804321, "learning_rate": 0.00017945316304063066, "loss": 1.3948, "step": 6464 }, { "epoch": 0.2315254176589611, "grad_norm": 1.539219617843628, "learning_rate": 0.00017944611932145193, "loss": 1.5818, "step": 6465 }, { "epoch": 0.2315612297885294, "grad_norm": 2.507035493850708, "learning_rate": 0.0001794390745334281, "loss": 1.1414, "step": 6466 }, { "epoch": 0.23159704191809766, "grad_norm": 2.5263805389404297, "learning_rate": 0.00017943202867665408, "loss": 1.7751, "step": 6467 }, { "epoch": 0.23163285404766595, "grad_norm": 1.9388482570648193, "learning_rate": 0.00017942498175122453, "loss": 1.9844, "step": 6468 }, { "epoch": 0.23166866617723422, "grad_norm": 1.8652827739715576, "learning_rate": 0.0001794179337572343, "loss": 1.4795, "step": 6469 }, { "epoch": 0.23170447830680252, "grad_norm": 1.9985748529434204, "learning_rate": 0.0001794108846947782, "loss": 1.7811, "step": 6470 }, { "epoch": 0.2317402904363708, "grad_norm": 2.3072433471679688, "learning_rate": 0.00017940383456395109, "loss": 1.4727, "step": 6471 }, { "epoch": 0.23177610256593908, "grad_norm": 1.2530908584594727, "learning_rate": 0.00017939678336484783, "loss": 1.459, "step": 6472 }, { "epoch": 0.23181191469550738, "grad_norm": 1.7884855270385742, "learning_rate": 0.00017938973109756323, "loss": 1.7551, "step": 6473 }, { "epoch": 0.23184772682507565, "grad_norm": 2.037320613861084, "learning_rate": 0.00017938267776219225, "loss": 1.684, "step": 6474 }, { "epoch": 0.23188353895464395, "grad_norm": 1.3687164783477783, "learning_rate": 0.00017937562335882968, "loss": 1.7375, "step": 6475 }, { "epoch": 0.23191935108421222, "grad_norm": 1.592024564743042, "learning_rate": 0.00017936856788757055, "loss": 1.5635, "step": 6476 }, { "epoch": 0.2319551632137805, "grad_norm": 1.433759331703186, "learning_rate": 0.00017936151134850966, "loss": 1.6694, "step": 6477 }, { "epoch": 0.23199097534334878, "grad_norm": 2.6087303161621094, "learning_rate": 0.00017935445374174202, "loss": 1.5391, "step": 6478 }, { "epoch": 0.23202678747291708, "grad_norm": 1.7297744750976562, "learning_rate": 0.0001793473950673626, "loss": 1.6245, "step": 6479 }, { "epoch": 0.23206259960248538, "grad_norm": 1.6509273052215576, "learning_rate": 0.00017934033532546632, "loss": 1.6673, "step": 6480 }, { "epoch": 0.23209841173205364, "grad_norm": 1.5647467374801636, "learning_rate": 0.00017933327451614812, "loss": 1.6559, "step": 6481 }, { "epoch": 0.23213422386162194, "grad_norm": 1.4652289152145386, "learning_rate": 0.0001793262126395031, "loss": 1.5293, "step": 6482 }, { "epoch": 0.2321700359911902, "grad_norm": 1.4504691362380981, "learning_rate": 0.00017931914969562617, "loss": 1.5716, "step": 6483 }, { "epoch": 0.2322058481207585, "grad_norm": 1.9844943284988403, "learning_rate": 0.0001793120856846124, "loss": 1.6779, "step": 6484 }, { "epoch": 0.23224166025032678, "grad_norm": 1.2851874828338623, "learning_rate": 0.00017930502060655682, "loss": 1.4328, "step": 6485 }, { "epoch": 0.23227747237989507, "grad_norm": 1.9430853128433228, "learning_rate": 0.0001792979544615545, "loss": 1.7059, "step": 6486 }, { "epoch": 0.23231328450946334, "grad_norm": 1.595819115638733, "learning_rate": 0.00017929088724970052, "loss": 1.567, "step": 6487 }, { "epoch": 0.23234909663903164, "grad_norm": 2.0435125827789307, "learning_rate": 0.0001792838189710899, "loss": 1.8018, "step": 6488 }, { "epoch": 0.23238490876859993, "grad_norm": 1.225368618965149, "learning_rate": 0.00017927674962581774, "loss": 1.3603, "step": 6489 }, { "epoch": 0.2324207208981682, "grad_norm": 1.9997037649154663, "learning_rate": 0.0001792696792139792, "loss": 1.4549, "step": 6490 }, { "epoch": 0.2324565330277365, "grad_norm": 1.4556210041046143, "learning_rate": 0.0001792626077356694, "loss": 1.5237, "step": 6491 }, { "epoch": 0.23249234515730477, "grad_norm": 1.36371648311615, "learning_rate": 0.0001792555351909834, "loss": 1.7727, "step": 6492 }, { "epoch": 0.23252815728687307, "grad_norm": 1.272359848022461, "learning_rate": 0.0001792484615800164, "loss": 1.4476, "step": 6493 }, { "epoch": 0.23256396941644134, "grad_norm": 1.8688451051712036, "learning_rate": 0.00017924138690286366, "loss": 1.6197, "step": 6494 }, { "epoch": 0.23259978154600963, "grad_norm": 1.4307365417480469, "learning_rate": 0.0001792343111596202, "loss": 1.5767, "step": 6495 }, { "epoch": 0.23263559367557793, "grad_norm": 1.3361865282058716, "learning_rate": 0.00017922723435038131, "loss": 1.2674, "step": 6496 }, { "epoch": 0.2326714058051462, "grad_norm": 1.858230710029602, "learning_rate": 0.00017922015647524217, "loss": 1.6375, "step": 6497 }, { "epoch": 0.2327072179347145, "grad_norm": 1.5478800535202026, "learning_rate": 0.00017921307753429803, "loss": 1.2073, "step": 6498 }, { "epoch": 0.23274303006428276, "grad_norm": 1.2986811399459839, "learning_rate": 0.00017920599752764408, "loss": 1.2458, "step": 6499 }, { "epoch": 0.23277884219385106, "grad_norm": 1.7942970991134644, "learning_rate": 0.00017919891645537563, "loss": 1.5128, "step": 6500 }, { "epoch": 0.23281465432341933, "grad_norm": 2.231881618499756, "learning_rate": 0.0001791918343175879, "loss": 1.7035, "step": 6501 }, { "epoch": 0.23285046645298763, "grad_norm": 1.8615409135818481, "learning_rate": 0.0001791847511143762, "loss": 1.269, "step": 6502 }, { "epoch": 0.23288627858255592, "grad_norm": 1.8193210363388062, "learning_rate": 0.0001791776668458358, "loss": 1.7327, "step": 6503 }, { "epoch": 0.2329220907121242, "grad_norm": 1.4053817987442017, "learning_rate": 0.00017917058151206204, "loss": 1.6057, "step": 6504 }, { "epoch": 0.2329579028416925, "grad_norm": 1.530077576637268, "learning_rate": 0.00017916349511315022, "loss": 1.6364, "step": 6505 }, { "epoch": 0.23299371497126076, "grad_norm": 1.6833380460739136, "learning_rate": 0.0001791564076491957, "loss": 1.5841, "step": 6506 }, { "epoch": 0.23302952710082905, "grad_norm": 1.6770578622817993, "learning_rate": 0.0001791493191202938, "loss": 1.8875, "step": 6507 }, { "epoch": 0.23306533923039732, "grad_norm": 2.6495304107666016, "learning_rate": 0.00017914222952653992, "loss": 1.6587, "step": 6508 }, { "epoch": 0.23310115135996562, "grad_norm": 1.6221790313720703, "learning_rate": 0.00017913513886802943, "loss": 1.5729, "step": 6509 }, { "epoch": 0.23313696348953392, "grad_norm": 1.6502245664596558, "learning_rate": 0.0001791280471448577, "loss": 1.6398, "step": 6510 }, { "epoch": 0.23317277561910218, "grad_norm": 1.5571110248565674, "learning_rate": 0.00017912095435712017, "loss": 1.515, "step": 6511 }, { "epoch": 0.23320858774867048, "grad_norm": 1.4226124286651611, "learning_rate": 0.0001791138605049123, "loss": 1.7522, "step": 6512 }, { "epoch": 0.23324439987823875, "grad_norm": 1.4621069431304932, "learning_rate": 0.00017910676558832944, "loss": 1.6877, "step": 6513 }, { "epoch": 0.23328021200780705, "grad_norm": 2.131011724472046, "learning_rate": 0.0001790996696074671, "loss": 1.6325, "step": 6514 }, { "epoch": 0.23331602413737532, "grad_norm": 1.4561113119125366, "learning_rate": 0.00017909257256242076, "loss": 1.3617, "step": 6515 }, { "epoch": 0.2333518362669436, "grad_norm": 1.3390988111495972, "learning_rate": 0.00017908547445328585, "loss": 1.6789, "step": 6516 }, { "epoch": 0.2333876483965119, "grad_norm": 1.5033286809921265, "learning_rate": 0.0001790783752801579, "loss": 1.547, "step": 6517 }, { "epoch": 0.23342346052608018, "grad_norm": 1.6072916984558105, "learning_rate": 0.00017907127504313241, "loss": 1.3535, "step": 6518 }, { "epoch": 0.23345927265564848, "grad_norm": 1.4692038297653198, "learning_rate": 0.00017906417374230493, "loss": 1.5247, "step": 6519 }, { "epoch": 0.23349508478521674, "grad_norm": 1.9015370607376099, "learning_rate": 0.00017905707137777098, "loss": 1.353, "step": 6520 }, { "epoch": 0.23353089691478504, "grad_norm": 1.8074142932891846, "learning_rate": 0.00017904996794962608, "loss": 1.4386, "step": 6521 }, { "epoch": 0.2335667090443533, "grad_norm": 1.8867095708847046, "learning_rate": 0.00017904286345796582, "loss": 1.5851, "step": 6522 }, { "epoch": 0.2336025211739216, "grad_norm": 1.6819953918457031, "learning_rate": 0.00017903575790288585, "loss": 1.5244, "step": 6523 }, { "epoch": 0.2336383333034899, "grad_norm": 2.2761800289154053, "learning_rate": 0.00017902865128448166, "loss": 1.8374, "step": 6524 }, { "epoch": 0.23367414543305817, "grad_norm": 1.5938763618469238, "learning_rate": 0.00017902154360284893, "loss": 1.3266, "step": 6525 }, { "epoch": 0.23370995756262647, "grad_norm": 1.7620958089828491, "learning_rate": 0.00017901443485808324, "loss": 1.6188, "step": 6526 }, { "epoch": 0.23374576969219474, "grad_norm": 1.395570158958435, "learning_rate": 0.00017900732505028025, "loss": 1.5055, "step": 6527 }, { "epoch": 0.23378158182176303, "grad_norm": 1.467537522315979, "learning_rate": 0.00017900021417953564, "loss": 1.5082, "step": 6528 }, { "epoch": 0.2338173939513313, "grad_norm": 1.5979235172271729, "learning_rate": 0.000178993102245945, "loss": 1.6297, "step": 6529 }, { "epoch": 0.2338532060808996, "grad_norm": 1.9237210750579834, "learning_rate": 0.0001789859892496041, "loss": 1.4545, "step": 6530 }, { "epoch": 0.2338890182104679, "grad_norm": 1.3320746421813965, "learning_rate": 0.00017897887519060862, "loss": 1.3044, "step": 6531 }, { "epoch": 0.23392483034003617, "grad_norm": 1.386905312538147, "learning_rate": 0.0001789717600690542, "loss": 1.6688, "step": 6532 }, { "epoch": 0.23396064246960446, "grad_norm": 1.8010025024414062, "learning_rate": 0.00017896464388503664, "loss": 1.2744, "step": 6533 }, { "epoch": 0.23399645459917273, "grad_norm": 1.6420273780822754, "learning_rate": 0.00017895752663865167, "loss": 1.6643, "step": 6534 }, { "epoch": 0.23403226672874103, "grad_norm": 1.6220277547836304, "learning_rate": 0.00017895040832999502, "loss": 1.6328, "step": 6535 }, { "epoch": 0.2340680788583093, "grad_norm": 1.4307856559753418, "learning_rate": 0.00017894328895916244, "loss": 1.6593, "step": 6536 }, { "epoch": 0.2341038909878776, "grad_norm": 1.9479830265045166, "learning_rate": 0.00017893616852624974, "loss": 1.4733, "step": 6537 }, { "epoch": 0.2341397031174459, "grad_norm": 1.6316691637039185, "learning_rate": 0.00017892904703135272, "loss": 1.6507, "step": 6538 }, { "epoch": 0.23417551524701416, "grad_norm": 1.9461170434951782, "learning_rate": 0.0001789219244745672, "loss": 0.9729, "step": 6539 }, { "epoch": 0.23421132737658246, "grad_norm": 1.3413337469100952, "learning_rate": 0.00017891480085598896, "loss": 1.6239, "step": 6540 }, { "epoch": 0.23424713950615073, "grad_norm": 1.4433578252792358, "learning_rate": 0.00017890767617571388, "loss": 1.4859, "step": 6541 }, { "epoch": 0.23428295163571902, "grad_norm": 1.8441306352615356, "learning_rate": 0.00017890055043383782, "loss": 1.5165, "step": 6542 }, { "epoch": 0.2343187637652873, "grad_norm": 1.71013343334198, "learning_rate": 0.0001788934236304566, "loss": 1.5745, "step": 6543 }, { "epoch": 0.2343545758948556, "grad_norm": 2.440901517868042, "learning_rate": 0.00017888629576566614, "loss": 2.0483, "step": 6544 }, { "epoch": 0.23439038802442388, "grad_norm": 1.9219796657562256, "learning_rate": 0.00017887916683956233, "loss": 1.4539, "step": 6545 }, { "epoch": 0.23442620015399215, "grad_norm": 1.5786051750183105, "learning_rate": 0.0001788720368522411, "loss": 1.3918, "step": 6546 }, { "epoch": 0.23446201228356045, "grad_norm": 1.9648666381835938, "learning_rate": 0.0001788649058037983, "loss": 1.5756, "step": 6547 }, { "epoch": 0.23449782441312872, "grad_norm": 1.5821706056594849, "learning_rate": 0.00017885777369432994, "loss": 1.402, "step": 6548 }, { "epoch": 0.23453363654269702, "grad_norm": 1.3339437246322632, "learning_rate": 0.000178850640523932, "loss": 1.5828, "step": 6549 }, { "epoch": 0.23456944867226528, "grad_norm": 1.375614047050476, "learning_rate": 0.00017884350629270035, "loss": 1.6135, "step": 6550 }, { "epoch": 0.23460526080183358, "grad_norm": 1.6413074731826782, "learning_rate": 0.00017883637100073104, "loss": 1.1794, "step": 6551 }, { "epoch": 0.23464107293140188, "grad_norm": 1.5169514417648315, "learning_rate": 0.00017882923464812006, "loss": 1.1396, "step": 6552 }, { "epoch": 0.23467688506097015, "grad_norm": 1.6196491718292236, "learning_rate": 0.00017882209723496338, "loss": 1.7209, "step": 6553 }, { "epoch": 0.23471269719053844, "grad_norm": 1.7131088972091675, "learning_rate": 0.00017881495876135708, "loss": 1.6069, "step": 6554 }, { "epoch": 0.2347485093201067, "grad_norm": 1.6686679124832153, "learning_rate": 0.00017880781922739717, "loss": 1.9669, "step": 6555 }, { "epoch": 0.234784321449675, "grad_norm": 1.7967392206192017, "learning_rate": 0.0001788006786331797, "loss": 1.5043, "step": 6556 }, { "epoch": 0.23482013357924328, "grad_norm": 1.6955111026763916, "learning_rate": 0.00017879353697880073, "loss": 1.6294, "step": 6557 }, { "epoch": 0.23485594570881158, "grad_norm": 1.5779500007629395, "learning_rate": 0.00017878639426435638, "loss": 1.3588, "step": 6558 }, { "epoch": 0.23489175783837987, "grad_norm": 1.3038164377212524, "learning_rate": 0.00017877925048994273, "loss": 1.5094, "step": 6559 }, { "epoch": 0.23492756996794814, "grad_norm": 1.5466959476470947, "learning_rate": 0.00017877210565565586, "loss": 1.7958, "step": 6560 }, { "epoch": 0.23496338209751644, "grad_norm": 1.521091103553772, "learning_rate": 0.0001787649597615919, "loss": 1.6854, "step": 6561 }, { "epoch": 0.2349991942270847, "grad_norm": 1.5606482028961182, "learning_rate": 0.00017875781280784705, "loss": 1.4973, "step": 6562 }, { "epoch": 0.235035006356653, "grad_norm": 2.0361833572387695, "learning_rate": 0.0001787506647945174, "loss": 1.3534, "step": 6563 }, { "epoch": 0.23507081848622127, "grad_norm": 2.3265650272369385, "learning_rate": 0.00017874351572169913, "loss": 1.3959, "step": 6564 }, { "epoch": 0.23510663061578957, "grad_norm": 1.6580675840377808, "learning_rate": 0.00017873636558948846, "loss": 1.4783, "step": 6565 }, { "epoch": 0.23514244274535787, "grad_norm": 1.493943691253662, "learning_rate": 0.00017872921439798152, "loss": 1.6773, "step": 6566 }, { "epoch": 0.23517825487492613, "grad_norm": 1.4382308721542358, "learning_rate": 0.00017872206214727455, "loss": 1.4732, "step": 6567 }, { "epoch": 0.23521406700449443, "grad_norm": 1.3880226612091064, "learning_rate": 0.0001787149088374638, "loss": 1.462, "step": 6568 }, { "epoch": 0.2352498791340627, "grad_norm": 1.3058699369430542, "learning_rate": 0.00017870775446864547, "loss": 1.5238, "step": 6569 }, { "epoch": 0.235285691263631, "grad_norm": 2.446697235107422, "learning_rate": 0.00017870059904091584, "loss": 1.4423, "step": 6570 }, { "epoch": 0.23532150339319927, "grad_norm": 1.8130626678466797, "learning_rate": 0.00017869344255437117, "loss": 1.5468, "step": 6571 }, { "epoch": 0.23535731552276756, "grad_norm": 1.4712576866149902, "learning_rate": 0.00017868628500910773, "loss": 1.6648, "step": 6572 }, { "epoch": 0.23539312765233586, "grad_norm": 1.2141464948654175, "learning_rate": 0.00017867912640522182, "loss": 1.7291, "step": 6573 }, { "epoch": 0.23542893978190413, "grad_norm": 1.6197760105133057, "learning_rate": 0.00017867196674280976, "loss": 1.9246, "step": 6574 }, { "epoch": 0.23546475191147243, "grad_norm": 1.6172927618026733, "learning_rate": 0.00017866480602196787, "loss": 1.3133, "step": 6575 }, { "epoch": 0.2355005640410407, "grad_norm": 1.7807902097702026, "learning_rate": 0.00017865764424279248, "loss": 1.4431, "step": 6576 }, { "epoch": 0.235536376170609, "grad_norm": 1.82480788230896, "learning_rate": 0.00017865048140537995, "loss": 1.7821, "step": 6577 }, { "epoch": 0.23557218830017726, "grad_norm": 1.434335708618164, "learning_rate": 0.00017864331750982665, "loss": 1.4632, "step": 6578 }, { "epoch": 0.23560800042974556, "grad_norm": 1.5445455312728882, "learning_rate": 0.0001786361525562289, "loss": 1.634, "step": 6579 }, { "epoch": 0.23564381255931385, "grad_norm": 1.4568474292755127, "learning_rate": 0.0001786289865446832, "loss": 1.5565, "step": 6580 }, { "epoch": 0.23567962468888212, "grad_norm": 1.866904377937317, "learning_rate": 0.00017862181947528592, "loss": 1.5269, "step": 6581 }, { "epoch": 0.23571543681845042, "grad_norm": 2.703174352645874, "learning_rate": 0.00017861465134813348, "loss": 1.5168, "step": 6582 }, { "epoch": 0.2357512489480187, "grad_norm": 1.7558021545410156, "learning_rate": 0.00017860748216332227, "loss": 1.4795, "step": 6583 }, { "epoch": 0.23578706107758698, "grad_norm": 1.4444247484207153, "learning_rate": 0.00017860031192094882, "loss": 1.4356, "step": 6584 }, { "epoch": 0.23582287320715525, "grad_norm": 1.3739668130874634, "learning_rate": 0.00017859314062110954, "loss": 1.4568, "step": 6585 }, { "epoch": 0.23585868533672355, "grad_norm": 1.9809393882751465, "learning_rate": 0.00017858596826390093, "loss": 1.6483, "step": 6586 }, { "epoch": 0.23589449746629182, "grad_norm": 1.643008828163147, "learning_rate": 0.0001785787948494195, "loss": 1.7832, "step": 6587 }, { "epoch": 0.23593030959586012, "grad_norm": 1.5805420875549316, "learning_rate": 0.00017857162037776173, "loss": 1.5009, "step": 6588 }, { "epoch": 0.2359661217254284, "grad_norm": 1.6239664554595947, "learning_rate": 0.0001785644448490242, "loss": 1.5902, "step": 6589 }, { "epoch": 0.23600193385499668, "grad_norm": 1.4822232723236084, "learning_rate": 0.00017855726826330334, "loss": 1.5981, "step": 6590 }, { "epoch": 0.23603774598456498, "grad_norm": 1.701465129852295, "learning_rate": 0.00017855009062069582, "loss": 1.4967, "step": 6591 }, { "epoch": 0.23607355811413325, "grad_norm": 2.413892984390259, "learning_rate": 0.00017854291192129812, "loss": 1.6526, "step": 6592 }, { "epoch": 0.23610937024370154, "grad_norm": 1.274664282798767, "learning_rate": 0.00017853573216520684, "loss": 1.6259, "step": 6593 }, { "epoch": 0.2361451823732698, "grad_norm": 1.5732338428497314, "learning_rate": 0.00017852855135251864, "loss": 1.2351, "step": 6594 }, { "epoch": 0.2361809945028381, "grad_norm": 1.7816098928451538, "learning_rate": 0.00017852136948333006, "loss": 1.6006, "step": 6595 }, { "epoch": 0.2362168066324064, "grad_norm": 1.5788737535476685, "learning_rate": 0.00017851418655773772, "loss": 1.5484, "step": 6596 }, { "epoch": 0.23625261876197468, "grad_norm": 1.6438406705856323, "learning_rate": 0.00017850700257583828, "loss": 1.704, "step": 6597 }, { "epoch": 0.23628843089154297, "grad_norm": 1.7204110622406006, "learning_rate": 0.00017849981753772836, "loss": 1.6303, "step": 6598 }, { "epoch": 0.23632424302111124, "grad_norm": 1.7380869388580322, "learning_rate": 0.0001784926314435047, "loss": 1.7262, "step": 6599 }, { "epoch": 0.23636005515067954, "grad_norm": 2.4021716117858887, "learning_rate": 0.00017848544429326392, "loss": 1.4997, "step": 6600 }, { "epoch": 0.2363958672802478, "grad_norm": 2.6693549156188965, "learning_rate": 0.00017847825608710273, "loss": 1.4439, "step": 6601 }, { "epoch": 0.2364316794098161, "grad_norm": 1.5793838500976562, "learning_rate": 0.00017847106682511782, "loss": 1.519, "step": 6602 }, { "epoch": 0.2364674915393844, "grad_norm": 2.942883253097534, "learning_rate": 0.00017846387650740592, "loss": 1.8072, "step": 6603 }, { "epoch": 0.23650330366895267, "grad_norm": 1.5325664281845093, "learning_rate": 0.00017845668513406378, "loss": 1.3556, "step": 6604 }, { "epoch": 0.23653911579852097, "grad_norm": 1.4211392402648926, "learning_rate": 0.00017844949270518816, "loss": 1.2079, "step": 6605 }, { "epoch": 0.23657492792808923, "grad_norm": 1.4556447267532349, "learning_rate": 0.00017844229922087582, "loss": 1.477, "step": 6606 }, { "epoch": 0.23661074005765753, "grad_norm": 2.2137489318847656, "learning_rate": 0.00017843510468122347, "loss": 1.6519, "step": 6607 }, { "epoch": 0.2366465521872258, "grad_norm": 1.6451637744903564, "learning_rate": 0.00017842790908632802, "loss": 1.4015, "step": 6608 }, { "epoch": 0.2366823643167941, "grad_norm": 1.9870620965957642, "learning_rate": 0.00017842071243628617, "loss": 1.3847, "step": 6609 }, { "epoch": 0.2367181764463624, "grad_norm": 1.9691691398620605, "learning_rate": 0.0001784135147311948, "loss": 1.5867, "step": 6610 }, { "epoch": 0.23675398857593066, "grad_norm": 1.8930854797363281, "learning_rate": 0.00017840631597115076, "loss": 1.7622, "step": 6611 }, { "epoch": 0.23678980070549896, "grad_norm": 1.7381625175476074, "learning_rate": 0.00017839911615625086, "loss": 1.5428, "step": 6612 }, { "epoch": 0.23682561283506723, "grad_norm": 1.6592036485671997, "learning_rate": 0.00017839191528659198, "loss": 1.6095, "step": 6613 }, { "epoch": 0.23686142496463553, "grad_norm": 1.5722460746765137, "learning_rate": 0.000178384713362271, "loss": 1.529, "step": 6614 }, { "epoch": 0.2368972370942038, "grad_norm": 1.8130706548690796, "learning_rate": 0.00017837751038338482, "loss": 1.619, "step": 6615 }, { "epoch": 0.2369330492237721, "grad_norm": 1.874606728553772, "learning_rate": 0.00017837030635003032, "loss": 1.675, "step": 6616 }, { "epoch": 0.2369688613533404, "grad_norm": 1.4484257698059082, "learning_rate": 0.00017836310126230444, "loss": 1.4822, "step": 6617 }, { "epoch": 0.23700467348290866, "grad_norm": 1.4785351753234863, "learning_rate": 0.00017835589512030413, "loss": 1.6452, "step": 6618 }, { "epoch": 0.23704048561247695, "grad_norm": 1.5347731113433838, "learning_rate": 0.00017834868792412632, "loss": 1.4137, "step": 6619 }, { "epoch": 0.23707629774204522, "grad_norm": 1.8363127708435059, "learning_rate": 0.00017834147967386797, "loss": 1.3594, "step": 6620 }, { "epoch": 0.23711210987161352, "grad_norm": 2.004275321960449, "learning_rate": 0.00017833427036962604, "loss": 1.6062, "step": 6621 }, { "epoch": 0.2371479220011818, "grad_norm": 1.5115212202072144, "learning_rate": 0.0001783270600114976, "loss": 1.4327, "step": 6622 }, { "epoch": 0.23718373413075008, "grad_norm": 1.6097291707992554, "learning_rate": 0.00017831984859957955, "loss": 1.4477, "step": 6623 }, { "epoch": 0.23721954626031838, "grad_norm": 1.896243929862976, "learning_rate": 0.00017831263613396898, "loss": 1.7308, "step": 6624 }, { "epoch": 0.23725535838988665, "grad_norm": 2.4240522384643555, "learning_rate": 0.0001783054226147629, "loss": 1.4856, "step": 6625 }, { "epoch": 0.23729117051945495, "grad_norm": 1.8697086572647095, "learning_rate": 0.0001782982080420584, "loss": 1.3909, "step": 6626 }, { "epoch": 0.23732698264902322, "grad_norm": 1.623146891593933, "learning_rate": 0.00017829099241595245, "loss": 1.6915, "step": 6627 }, { "epoch": 0.2373627947785915, "grad_norm": 1.2264447212219238, "learning_rate": 0.00017828377573654225, "loss": 1.7342, "step": 6628 }, { "epoch": 0.23739860690815978, "grad_norm": 1.5856890678405762, "learning_rate": 0.00017827655800392478, "loss": 1.7977, "step": 6629 }, { "epoch": 0.23743441903772808, "grad_norm": 1.787320852279663, "learning_rate": 0.00017826933921819723, "loss": 1.4515, "step": 6630 }, { "epoch": 0.23747023116729638, "grad_norm": 1.6283372640609741, "learning_rate": 0.00017826211937945665, "loss": 1.9511, "step": 6631 }, { "epoch": 0.23750604329686464, "grad_norm": 1.7102324962615967, "learning_rate": 0.00017825489848780022, "loss": 1.6524, "step": 6632 }, { "epoch": 0.23754185542643294, "grad_norm": 2.1773719787597656, "learning_rate": 0.00017824767654332505, "loss": 1.7596, "step": 6633 }, { "epoch": 0.2375776675560012, "grad_norm": 1.3473145961761475, "learning_rate": 0.00017824045354612836, "loss": 1.2803, "step": 6634 }, { "epoch": 0.2376134796855695, "grad_norm": 1.546225666999817, "learning_rate": 0.00017823322949630727, "loss": 1.5292, "step": 6635 }, { "epoch": 0.23764929181513778, "grad_norm": 1.7746154069900513, "learning_rate": 0.000178226004393959, "loss": 1.4978, "step": 6636 }, { "epoch": 0.23768510394470607, "grad_norm": 1.3727898597717285, "learning_rate": 0.0001782187782391807, "loss": 1.3965, "step": 6637 }, { "epoch": 0.23772091607427437, "grad_norm": 2.398136615753174, "learning_rate": 0.0001782115510320697, "loss": 1.827, "step": 6638 }, { "epoch": 0.23775672820384264, "grad_norm": 1.6898165941238403, "learning_rate": 0.00017820432277272313, "loss": 1.5812, "step": 6639 }, { "epoch": 0.23779254033341093, "grad_norm": 1.2213449478149414, "learning_rate": 0.00017819709346123826, "loss": 1.6483, "step": 6640 }, { "epoch": 0.2378283524629792, "grad_norm": 1.784114956855774, "learning_rate": 0.0001781898630977124, "loss": 1.5349, "step": 6641 }, { "epoch": 0.2378641645925475, "grad_norm": 1.8772135972976685, "learning_rate": 0.00017818263168224276, "loss": 1.5394, "step": 6642 }, { "epoch": 0.23789997672211577, "grad_norm": 2.150813341140747, "learning_rate": 0.0001781753992149267, "loss": 1.554, "step": 6643 }, { "epoch": 0.23793578885168407, "grad_norm": 1.4352753162384033, "learning_rate": 0.00017816816569586144, "loss": 1.7107, "step": 6644 }, { "epoch": 0.23797160098125236, "grad_norm": 1.6923801898956299, "learning_rate": 0.00017816093112514437, "loss": 1.5776, "step": 6645 }, { "epoch": 0.23800741311082063, "grad_norm": 1.5510084629058838, "learning_rate": 0.00017815369550287278, "loss": 1.5203, "step": 6646 }, { "epoch": 0.23804322524038893, "grad_norm": 1.1947929859161377, "learning_rate": 0.00017814645882914402, "loss": 1.6705, "step": 6647 }, { "epoch": 0.2380790373699572, "grad_norm": 1.9524680376052856, "learning_rate": 0.00017813922110405548, "loss": 1.7717, "step": 6648 }, { "epoch": 0.2381148494995255, "grad_norm": 1.318559169769287, "learning_rate": 0.00017813198232770447, "loss": 1.445, "step": 6649 }, { "epoch": 0.23815066162909376, "grad_norm": 1.6523274183273315, "learning_rate": 0.00017812474250018844, "loss": 1.4655, "step": 6650 }, { "epoch": 0.23818647375866206, "grad_norm": 1.3412474393844604, "learning_rate": 0.00017811750162160478, "loss": 1.4733, "step": 6651 }, { "epoch": 0.23822228588823036, "grad_norm": 1.291676640510559, "learning_rate": 0.00017811025969205092, "loss": 1.3368, "step": 6652 }, { "epoch": 0.23825809801779863, "grad_norm": 1.5550686120986938, "learning_rate": 0.00017810301671162426, "loss": 1.7387, "step": 6653 }, { "epoch": 0.23829391014736692, "grad_norm": 1.3425484895706177, "learning_rate": 0.00017809577268042224, "loss": 1.5973, "step": 6654 }, { "epoch": 0.2383297222769352, "grad_norm": 1.645659327507019, "learning_rate": 0.00017808852759854235, "loss": 1.4145, "step": 6655 }, { "epoch": 0.2383655344065035, "grad_norm": 1.3277592658996582, "learning_rate": 0.00017808128146608204, "loss": 1.4578, "step": 6656 }, { "epoch": 0.23840134653607176, "grad_norm": 1.8686926364898682, "learning_rate": 0.0001780740342831388, "loss": 1.5942, "step": 6657 }, { "epoch": 0.23843715866564005, "grad_norm": 1.322493553161621, "learning_rate": 0.00017806678604981012, "loss": 1.2065, "step": 6658 }, { "epoch": 0.23847297079520835, "grad_norm": 1.6285942792892456, "learning_rate": 0.00017805953676619356, "loss": 1.6118, "step": 6659 }, { "epoch": 0.23850878292477662, "grad_norm": 1.799084186553955, "learning_rate": 0.00017805228643238662, "loss": 1.5807, "step": 6660 }, { "epoch": 0.23854459505434492, "grad_norm": 2.529820680618286, "learning_rate": 0.00017804503504848684, "loss": 1.4212, "step": 6661 }, { "epoch": 0.23858040718391318, "grad_norm": 2.7669854164123535, "learning_rate": 0.00017803778261459181, "loss": 1.5821, "step": 6662 }, { "epoch": 0.23861621931348148, "grad_norm": 1.924968957901001, "learning_rate": 0.00017803052913079905, "loss": 1.3714, "step": 6663 }, { "epoch": 0.23865203144304975, "grad_norm": 1.2607649564743042, "learning_rate": 0.0001780232745972062, "loss": 1.3695, "step": 6664 }, { "epoch": 0.23868784357261805, "grad_norm": 1.7694482803344727, "learning_rate": 0.00017801601901391078, "loss": 1.433, "step": 6665 }, { "epoch": 0.23872365570218634, "grad_norm": 1.6368229389190674, "learning_rate": 0.0001780087623810105, "loss": 1.8103, "step": 6666 }, { "epoch": 0.2387594678317546, "grad_norm": 1.6224684715270996, "learning_rate": 0.00017800150469860293, "loss": 1.7134, "step": 6667 }, { "epoch": 0.2387952799613229, "grad_norm": 1.4222160577774048, "learning_rate": 0.00017799424596678573, "loss": 1.5797, "step": 6668 }, { "epoch": 0.23883109209089118, "grad_norm": 1.7973840236663818, "learning_rate": 0.00017798698618565653, "loss": 1.552, "step": 6669 }, { "epoch": 0.23886690422045948, "grad_norm": 1.7325069904327393, "learning_rate": 0.0001779797253553131, "loss": 1.9692, "step": 6670 }, { "epoch": 0.23890271635002774, "grad_norm": 1.5968601703643799, "learning_rate": 0.000177972463475853, "loss": 1.68, "step": 6671 }, { "epoch": 0.23893852847959604, "grad_norm": 1.8423383235931396, "learning_rate": 0.000177965200547374, "loss": 1.8313, "step": 6672 }, { "epoch": 0.23897434060916434, "grad_norm": 2.4150888919830322, "learning_rate": 0.00017795793656997377, "loss": 1.3579, "step": 6673 }, { "epoch": 0.2390101527387326, "grad_norm": 1.754241943359375, "learning_rate": 0.00017795067154375007, "loss": 1.4763, "step": 6674 }, { "epoch": 0.2390459648683009, "grad_norm": 1.7057687044143677, "learning_rate": 0.00017794340546880064, "loss": 1.5102, "step": 6675 }, { "epoch": 0.23908177699786917, "grad_norm": 1.922208309173584, "learning_rate": 0.00017793613834522326, "loss": 1.4778, "step": 6676 }, { "epoch": 0.23911758912743747, "grad_norm": 1.225993037223816, "learning_rate": 0.0001779288701731156, "loss": 1.5574, "step": 6677 }, { "epoch": 0.23915340125700574, "grad_norm": 2.4143288135528564, "learning_rate": 0.00017792160095257556, "loss": 1.6166, "step": 6678 }, { "epoch": 0.23918921338657403, "grad_norm": 1.4441640377044678, "learning_rate": 0.00017791433068370087, "loss": 1.5373, "step": 6679 }, { "epoch": 0.23922502551614233, "grad_norm": 1.7802202701568604, "learning_rate": 0.00017790705936658938, "loss": 1.1986, "step": 6680 }, { "epoch": 0.2392608376457106, "grad_norm": 2.0310826301574707, "learning_rate": 0.00017789978700133888, "loss": 1.8544, "step": 6681 }, { "epoch": 0.2392966497752789, "grad_norm": 1.6153504848480225, "learning_rate": 0.00017789251358804725, "loss": 1.5214, "step": 6682 }, { "epoch": 0.23933246190484717, "grad_norm": 1.5683387517929077, "learning_rate": 0.00017788523912681231, "loss": 1.5966, "step": 6683 }, { "epoch": 0.23936827403441546, "grad_norm": 2.497631072998047, "learning_rate": 0.00017787796361773197, "loss": 1.4201, "step": 6684 }, { "epoch": 0.23940408616398373, "grad_norm": 1.5542875528335571, "learning_rate": 0.00017787068706090405, "loss": 1.6915, "step": 6685 }, { "epoch": 0.23943989829355203, "grad_norm": 1.6513056755065918, "learning_rate": 0.0001778634094564265, "loss": 1.4809, "step": 6686 }, { "epoch": 0.2394757104231203, "grad_norm": 2.319539785385132, "learning_rate": 0.0001778561308043972, "loss": 1.4589, "step": 6687 }, { "epoch": 0.2395115225526886, "grad_norm": 1.8904774188995361, "learning_rate": 0.00017784885110491412, "loss": 1.6562, "step": 6688 }, { "epoch": 0.2395473346822569, "grad_norm": 1.582964539527893, "learning_rate": 0.00017784157035807515, "loss": 1.6561, "step": 6689 }, { "epoch": 0.23958314681182516, "grad_norm": 1.625274896621704, "learning_rate": 0.00017783428856397825, "loss": 1.4374, "step": 6690 }, { "epoch": 0.23961895894139346, "grad_norm": 1.5694506168365479, "learning_rate": 0.00017782700572272137, "loss": 1.6288, "step": 6691 }, { "epoch": 0.23965477107096173, "grad_norm": 2.2098121643066406, "learning_rate": 0.00017781972183440254, "loss": 1.6151, "step": 6692 }, { "epoch": 0.23969058320053002, "grad_norm": 2.150939464569092, "learning_rate": 0.00017781243689911973, "loss": 1.6039, "step": 6693 }, { "epoch": 0.2397263953300983, "grad_norm": 1.4807977676391602, "learning_rate": 0.00017780515091697096, "loss": 1.7675, "step": 6694 }, { "epoch": 0.2397622074596666, "grad_norm": 1.5989789962768555, "learning_rate": 0.00017779786388805424, "loss": 1.3923, "step": 6695 }, { "epoch": 0.23979801958923488, "grad_norm": 1.3950860500335693, "learning_rate": 0.00017779057581246763, "loss": 1.4481, "step": 6696 }, { "epoch": 0.23983383171880315, "grad_norm": 1.7480555772781372, "learning_rate": 0.00017778328669030918, "loss": 2.0431, "step": 6697 }, { "epoch": 0.23986964384837145, "grad_norm": 1.7169839143753052, "learning_rate": 0.0001777759965216769, "loss": 1.5416, "step": 6698 }, { "epoch": 0.23990545597793972, "grad_norm": 1.6872042417526245, "learning_rate": 0.0001777687053066689, "loss": 1.726, "step": 6699 }, { "epoch": 0.23994126810750802, "grad_norm": 1.3428165912628174, "learning_rate": 0.00017776141304538332, "loss": 1.4027, "step": 6700 }, { "epoch": 0.23997708023707628, "grad_norm": 1.719918131828308, "learning_rate": 0.00017775411973791822, "loss": 1.6605, "step": 6701 }, { "epoch": 0.24001289236664458, "grad_norm": 1.745969533920288, "learning_rate": 0.00017774682538437175, "loss": 1.4542, "step": 6702 }, { "epoch": 0.24004870449621288, "grad_norm": 1.530691146850586, "learning_rate": 0.00017773952998484204, "loss": 1.5051, "step": 6703 }, { "epoch": 0.24008451662578115, "grad_norm": 1.7143809795379639, "learning_rate": 0.0001777322335394272, "loss": 1.7822, "step": 6704 }, { "epoch": 0.24012032875534944, "grad_norm": 2.3987367153167725, "learning_rate": 0.00017772493604822543, "loss": 1.5643, "step": 6705 }, { "epoch": 0.2401561408849177, "grad_norm": 1.9206353425979614, "learning_rate": 0.00017771763751133488, "loss": 1.6011, "step": 6706 }, { "epoch": 0.240191953014486, "grad_norm": 1.3068255186080933, "learning_rate": 0.0001777103379288538, "loss": 1.3736, "step": 6707 }, { "epoch": 0.24022776514405428, "grad_norm": 1.405949592590332, "learning_rate": 0.00017770303730088035, "loss": 1.5704, "step": 6708 }, { "epoch": 0.24026357727362257, "grad_norm": 2.038421392440796, "learning_rate": 0.00017769573562751275, "loss": 1.5038, "step": 6709 }, { "epoch": 0.24029938940319087, "grad_norm": 1.7716072797775269, "learning_rate": 0.0001776884329088493, "loss": 1.7277, "step": 6710 }, { "epoch": 0.24033520153275914, "grad_norm": 1.2767550945281982, "learning_rate": 0.00017768112914498817, "loss": 1.2572, "step": 6711 }, { "epoch": 0.24037101366232744, "grad_norm": 2.466627836227417, "learning_rate": 0.00017767382433602762, "loss": 1.4945, "step": 6712 }, { "epoch": 0.2404068257918957, "grad_norm": 1.978790521621704, "learning_rate": 0.00017766651848206597, "loss": 1.8079, "step": 6713 }, { "epoch": 0.240442637921464, "grad_norm": 1.4621291160583496, "learning_rate": 0.00017765921158320152, "loss": 1.6702, "step": 6714 }, { "epoch": 0.24047845005103227, "grad_norm": 1.7895176410675049, "learning_rate": 0.00017765190363953253, "loss": 1.2385, "step": 6715 }, { "epoch": 0.24051426218060057, "grad_norm": 1.5651522874832153, "learning_rate": 0.00017764459465115736, "loss": 1.6956, "step": 6716 }, { "epoch": 0.24055007431016887, "grad_norm": 1.6989222764968872, "learning_rate": 0.0001776372846181743, "loss": 1.5337, "step": 6717 }, { "epoch": 0.24058588643973713, "grad_norm": 2.1151821613311768, "learning_rate": 0.00017762997354068172, "loss": 1.8988, "step": 6718 }, { "epoch": 0.24062169856930543, "grad_norm": 2.0804238319396973, "learning_rate": 0.00017762266141877796, "loss": 1.4169, "step": 6719 }, { "epoch": 0.2406575106988737, "grad_norm": 1.9376277923583984, "learning_rate": 0.00017761534825256144, "loss": 1.5997, "step": 6720 }, { "epoch": 0.240693322828442, "grad_norm": 1.5434688329696655, "learning_rate": 0.00017760803404213052, "loss": 1.0933, "step": 6721 }, { "epoch": 0.24072913495801027, "grad_norm": 2.0929014682769775, "learning_rate": 0.00017760071878758363, "loss": 1.4231, "step": 6722 }, { "epoch": 0.24076494708757856, "grad_norm": 2.1523287296295166, "learning_rate": 0.00017759340248901917, "loss": 1.7028, "step": 6723 }, { "epoch": 0.24080075921714686, "grad_norm": 1.568102240562439, "learning_rate": 0.00017758608514653555, "loss": 1.4582, "step": 6724 }, { "epoch": 0.24083657134671513, "grad_norm": 1.5287367105484009, "learning_rate": 0.00017757876676023125, "loss": 1.503, "step": 6725 }, { "epoch": 0.24087238347628342, "grad_norm": 1.5015430450439453, "learning_rate": 0.0001775714473302047, "loss": 1.4269, "step": 6726 }, { "epoch": 0.2409081956058517, "grad_norm": 1.2125335931777954, "learning_rate": 0.0001775641268565544, "loss": 1.5207, "step": 6727 }, { "epoch": 0.24094400773542, "grad_norm": 2.623624086380005, "learning_rate": 0.0001775568053393788, "loss": 1.7906, "step": 6728 }, { "epoch": 0.24097981986498826, "grad_norm": 1.626744270324707, "learning_rate": 0.00017754948277877642, "loss": 1.6469, "step": 6729 }, { "epoch": 0.24101563199455656, "grad_norm": 1.9941731691360474, "learning_rate": 0.0001775421591748458, "loss": 1.5711, "step": 6730 }, { "epoch": 0.24105144412412485, "grad_norm": 1.4977132081985474, "learning_rate": 0.00017753483452768545, "loss": 1.5056, "step": 6731 }, { "epoch": 0.24108725625369312, "grad_norm": 1.9533582925796509, "learning_rate": 0.0001775275088373939, "loss": 1.5436, "step": 6732 }, { "epoch": 0.24112306838326142, "grad_norm": 2.0875680446624756, "learning_rate": 0.00017752018210406972, "loss": 1.8286, "step": 6733 }, { "epoch": 0.2411588805128297, "grad_norm": 1.3487203121185303, "learning_rate": 0.00017751285432781152, "loss": 1.483, "step": 6734 }, { "epoch": 0.24119469264239798, "grad_norm": 1.8331043720245361, "learning_rate": 0.00017750552550871782, "loss": 1.6495, "step": 6735 }, { "epoch": 0.24123050477196625, "grad_norm": 1.6515965461730957, "learning_rate": 0.00017749819564688725, "loss": 1.3278, "step": 6736 }, { "epoch": 0.24126631690153455, "grad_norm": 1.197119116783142, "learning_rate": 0.00017749086474241844, "loss": 1.5107, "step": 6737 }, { "epoch": 0.24130212903110285, "grad_norm": 2.1350796222686768, "learning_rate": 0.00017748353279540999, "loss": 1.705, "step": 6738 }, { "epoch": 0.24133794116067112, "grad_norm": 1.517443060874939, "learning_rate": 0.00017747619980596055, "loss": 1.6448, "step": 6739 }, { "epoch": 0.2413737532902394, "grad_norm": 1.9738852977752686, "learning_rate": 0.00017746886577416876, "loss": 1.4471, "step": 6740 }, { "epoch": 0.24140956541980768, "grad_norm": 1.6398792266845703, "learning_rate": 0.00017746153070013335, "loss": 1.7618, "step": 6741 }, { "epoch": 0.24144537754937598, "grad_norm": 1.5546917915344238, "learning_rate": 0.00017745419458395294, "loss": 1.7391, "step": 6742 }, { "epoch": 0.24148118967894425, "grad_norm": 1.7060317993164062, "learning_rate": 0.00017744685742572625, "loss": 1.9142, "step": 6743 }, { "epoch": 0.24151700180851254, "grad_norm": 1.6105141639709473, "learning_rate": 0.000177439519225552, "loss": 1.5765, "step": 6744 }, { "epoch": 0.24155281393808084, "grad_norm": 1.5093984603881836, "learning_rate": 0.0001774321799835289, "loss": 1.6196, "step": 6745 }, { "epoch": 0.2415886260676491, "grad_norm": 1.6497421264648438, "learning_rate": 0.00017742483969975572, "loss": 1.575, "step": 6746 }, { "epoch": 0.2416244381972174, "grad_norm": 1.5175936222076416, "learning_rate": 0.00017741749837433117, "loss": 1.6146, "step": 6747 }, { "epoch": 0.24166025032678567, "grad_norm": 1.6494495868682861, "learning_rate": 0.00017741015600735403, "loss": 1.4422, "step": 6748 }, { "epoch": 0.24169606245635397, "grad_norm": 1.5587164163589478, "learning_rate": 0.0001774028125989231, "loss": 1.7001, "step": 6749 }, { "epoch": 0.24173187458592224, "grad_norm": 2.176522970199585, "learning_rate": 0.00017739546814913722, "loss": 1.6531, "step": 6750 }, { "epoch": 0.24176768671549054, "grad_norm": 1.236741065979004, "learning_rate": 0.00017738812265809508, "loss": 1.5294, "step": 6751 }, { "epoch": 0.24180349884505883, "grad_norm": 1.8967229127883911, "learning_rate": 0.0001773807761258956, "loss": 1.4595, "step": 6752 }, { "epoch": 0.2418393109746271, "grad_norm": 2.09911847114563, "learning_rate": 0.0001773734285526376, "loss": 1.4782, "step": 6753 }, { "epoch": 0.2418751231041954, "grad_norm": 1.3407038450241089, "learning_rate": 0.0001773660799384199, "loss": 1.4959, "step": 6754 }, { "epoch": 0.24191093523376367, "grad_norm": 1.8099561929702759, "learning_rate": 0.0001773587302833414, "loss": 1.2301, "step": 6755 }, { "epoch": 0.24194674736333197, "grad_norm": 1.691125750541687, "learning_rate": 0.000177351379587501, "loss": 1.3265, "step": 6756 }, { "epoch": 0.24198255949290023, "grad_norm": 1.1911656856536865, "learning_rate": 0.0001773440278509975, "loss": 1.7656, "step": 6757 }, { "epoch": 0.24201837162246853, "grad_norm": 1.8270931243896484, "learning_rate": 0.00017733667507392991, "loss": 1.5717, "step": 6758 }, { "epoch": 0.24205418375203683, "grad_norm": 2.1644065380096436, "learning_rate": 0.00017732932125639713, "loss": 1.6089, "step": 6759 }, { "epoch": 0.2420899958816051, "grad_norm": 1.8169310092926025, "learning_rate": 0.00017732196639849804, "loss": 1.6283, "step": 6760 }, { "epoch": 0.2421258080111734, "grad_norm": 1.8802064657211304, "learning_rate": 0.0001773146105003317, "loss": 1.5327, "step": 6761 }, { "epoch": 0.24216162014074166, "grad_norm": 1.2530100345611572, "learning_rate": 0.00017730725356199692, "loss": 1.553, "step": 6762 }, { "epoch": 0.24219743227030996, "grad_norm": 1.9466575384140015, "learning_rate": 0.0001772998955835928, "loss": 1.7341, "step": 6763 }, { "epoch": 0.24223324439987823, "grad_norm": 1.4589053392410278, "learning_rate": 0.00017729253656521832, "loss": 1.4459, "step": 6764 }, { "epoch": 0.24226905652944652, "grad_norm": 1.6372430324554443, "learning_rate": 0.00017728517650697243, "loss": 1.6402, "step": 6765 }, { "epoch": 0.24230486865901482, "grad_norm": 1.8554078340530396, "learning_rate": 0.0001772778154089542, "loss": 1.511, "step": 6766 }, { "epoch": 0.2423406807885831, "grad_norm": 1.3996329307556152, "learning_rate": 0.0001772704532712626, "loss": 1.6167, "step": 6767 }, { "epoch": 0.2423764929181514, "grad_norm": 1.9406613111495972, "learning_rate": 0.00017726309009399676, "loss": 1.6832, "step": 6768 }, { "epoch": 0.24241230504771966, "grad_norm": 2.206716537475586, "learning_rate": 0.0001772557258772557, "loss": 1.6634, "step": 6769 }, { "epoch": 0.24244811717728795, "grad_norm": 1.4321078062057495, "learning_rate": 0.0001772483606211385, "loss": 1.4673, "step": 6770 }, { "epoch": 0.24248392930685622, "grad_norm": 1.2396024465560913, "learning_rate": 0.00017724099432574425, "loss": 1.5565, "step": 6771 }, { "epoch": 0.24251974143642452, "grad_norm": 1.692460060119629, "learning_rate": 0.00017723362699117206, "loss": 1.8142, "step": 6772 }, { "epoch": 0.24255555356599282, "grad_norm": 1.9822334051132202, "learning_rate": 0.00017722625861752103, "loss": 1.6546, "step": 6773 }, { "epoch": 0.24259136569556108, "grad_norm": 1.494781255722046, "learning_rate": 0.0001772188892048903, "loss": 1.4441, "step": 6774 }, { "epoch": 0.24262717782512938, "grad_norm": 2.4597997665405273, "learning_rate": 0.00017721151875337907, "loss": 2.0706, "step": 6775 }, { "epoch": 0.24266298995469765, "grad_norm": 1.3796907663345337, "learning_rate": 0.00017720414726308642, "loss": 1.5801, "step": 6776 }, { "epoch": 0.24269880208426595, "grad_norm": 1.4934757947921753, "learning_rate": 0.00017719677473411154, "loss": 1.3307, "step": 6777 }, { "epoch": 0.24273461421383422, "grad_norm": 1.4481245279312134, "learning_rate": 0.00017718940116655363, "loss": 1.7379, "step": 6778 }, { "epoch": 0.2427704263434025, "grad_norm": 1.3896671533584595, "learning_rate": 0.00017718202656051194, "loss": 1.7022, "step": 6779 }, { "epoch": 0.2428062384729708, "grad_norm": 1.709444284439087, "learning_rate": 0.0001771746509160856, "loss": 1.4188, "step": 6780 }, { "epoch": 0.24284205060253908, "grad_norm": 1.5271943807601929, "learning_rate": 0.00017716727423337388, "loss": 1.4017, "step": 6781 }, { "epoch": 0.24287786273210737, "grad_norm": 1.7868486642837524, "learning_rate": 0.00017715989651247602, "loss": 1.6336, "step": 6782 }, { "epoch": 0.24291367486167564, "grad_norm": 2.2249338626861572, "learning_rate": 0.0001771525177534913, "loss": 1.4794, "step": 6783 }, { "epoch": 0.24294948699124394, "grad_norm": 1.2865608930587769, "learning_rate": 0.00017714513795651898, "loss": 1.4339, "step": 6784 }, { "epoch": 0.2429852991208122, "grad_norm": 1.402057409286499, "learning_rate": 0.00017713775712165832, "loss": 1.705, "step": 6785 }, { "epoch": 0.2430211112503805, "grad_norm": 1.5345312356948853, "learning_rate": 0.00017713037524900863, "loss": 1.6735, "step": 6786 }, { "epoch": 0.24305692337994877, "grad_norm": 1.4884132146835327, "learning_rate": 0.00017712299233866923, "loss": 1.5185, "step": 6787 }, { "epoch": 0.24309273550951707, "grad_norm": 2.092890977859497, "learning_rate": 0.0001771156083907395, "loss": 1.8618, "step": 6788 }, { "epoch": 0.24312854763908537, "grad_norm": 1.263377070426941, "learning_rate": 0.0001771082234053187, "loss": 1.5741, "step": 6789 }, { "epoch": 0.24316435976865364, "grad_norm": 1.8555021286010742, "learning_rate": 0.0001771008373825062, "loss": 1.4518, "step": 6790 }, { "epoch": 0.24320017189822193, "grad_norm": 1.67750084400177, "learning_rate": 0.0001770934503224014, "loss": 1.3955, "step": 6791 }, { "epoch": 0.2432359840277902, "grad_norm": 1.6171958446502686, "learning_rate": 0.00017708606222510367, "loss": 1.3687, "step": 6792 }, { "epoch": 0.2432717961573585, "grad_norm": 2.1286425590515137, "learning_rate": 0.0001770786730907124, "loss": 1.5347, "step": 6793 }, { "epoch": 0.24330760828692677, "grad_norm": 1.4012260437011719, "learning_rate": 0.00017707128291932702, "loss": 1.4511, "step": 6794 }, { "epoch": 0.24334342041649507, "grad_norm": 2.556178569793701, "learning_rate": 0.00017706389171104694, "loss": 1.8041, "step": 6795 }, { "epoch": 0.24337923254606336, "grad_norm": 2.018625259399414, "learning_rate": 0.00017705649946597157, "loss": 1.6411, "step": 6796 }, { "epoch": 0.24341504467563163, "grad_norm": 1.3456376791000366, "learning_rate": 0.00017704910618420044, "loss": 1.537, "step": 6797 }, { "epoch": 0.24345085680519993, "grad_norm": 1.720267653465271, "learning_rate": 0.00017704171186583295, "loss": 1.6337, "step": 6798 }, { "epoch": 0.2434866689347682, "grad_norm": 1.793156623840332, "learning_rate": 0.00017703431651096862, "loss": 1.9494, "step": 6799 }, { "epoch": 0.2435224810643365, "grad_norm": 2.458571195602417, "learning_rate": 0.00017702692011970693, "loss": 1.347, "step": 6800 }, { "epoch": 0.24355829319390476, "grad_norm": 2.3884029388427734, "learning_rate": 0.00017701952269214737, "loss": 1.5462, "step": 6801 }, { "epoch": 0.24359410532347306, "grad_norm": 2.0176198482513428, "learning_rate": 0.00017701212422838948, "loss": 1.5606, "step": 6802 }, { "epoch": 0.24362991745304136, "grad_norm": 2.216850519180298, "learning_rate": 0.00017700472472853283, "loss": 1.7962, "step": 6803 }, { "epoch": 0.24366572958260962, "grad_norm": 1.417450189590454, "learning_rate": 0.00017699732419267688, "loss": 1.4538, "step": 6804 }, { "epoch": 0.24370154171217792, "grad_norm": 2.4908945560455322, "learning_rate": 0.0001769899226209213, "loss": 1.3105, "step": 6805 }, { "epoch": 0.2437373538417462, "grad_norm": 1.8872681856155396, "learning_rate": 0.0001769825200133656, "loss": 1.6064, "step": 6806 }, { "epoch": 0.2437731659713145, "grad_norm": 1.579154133796692, "learning_rate": 0.00017697511637010938, "loss": 1.2872, "step": 6807 }, { "epoch": 0.24380897810088276, "grad_norm": 2.0289571285247803, "learning_rate": 0.0001769677116912523, "loss": 1.4756, "step": 6808 }, { "epoch": 0.24384479023045105, "grad_norm": 1.60190749168396, "learning_rate": 0.00017696030597689393, "loss": 1.501, "step": 6809 }, { "epoch": 0.24388060236001935, "grad_norm": 1.8626149892807007, "learning_rate": 0.00017695289922713389, "loss": 1.4009, "step": 6810 }, { "epoch": 0.24391641448958762, "grad_norm": 1.8006678819656372, "learning_rate": 0.00017694549144207185, "loss": 1.6877, "step": 6811 }, { "epoch": 0.24395222661915592, "grad_norm": 1.565643310546875, "learning_rate": 0.0001769380826218075, "loss": 1.6743, "step": 6812 }, { "epoch": 0.24398803874872418, "grad_norm": 1.7469196319580078, "learning_rate": 0.00017693067276644049, "loss": 1.6445, "step": 6813 }, { "epoch": 0.24402385087829248, "grad_norm": 1.8169236183166504, "learning_rate": 0.00017692326187607052, "loss": 1.4561, "step": 6814 }, { "epoch": 0.24405966300786075, "grad_norm": 1.2375288009643555, "learning_rate": 0.00017691584995079725, "loss": 1.2383, "step": 6815 }, { "epoch": 0.24409547513742905, "grad_norm": 1.8225083351135254, "learning_rate": 0.00017690843699072045, "loss": 1.6827, "step": 6816 }, { "epoch": 0.24413128726699734, "grad_norm": 1.7936789989471436, "learning_rate": 0.00017690102299593985, "loss": 1.7272, "step": 6817 }, { "epoch": 0.2441670993965656, "grad_norm": 1.4096508026123047, "learning_rate": 0.00017689360796655515, "loss": 1.5091, "step": 6818 }, { "epoch": 0.2442029115261339, "grad_norm": 1.3659213781356812, "learning_rate": 0.00017688619190266616, "loss": 1.6082, "step": 6819 }, { "epoch": 0.24423872365570218, "grad_norm": 1.6657557487487793, "learning_rate": 0.00017687877480437262, "loss": 1.5673, "step": 6820 }, { "epoch": 0.24427453578527047, "grad_norm": 1.459875226020813, "learning_rate": 0.00017687135667177436, "loss": 1.6767, "step": 6821 }, { "epoch": 0.24431034791483874, "grad_norm": 1.7852752208709717, "learning_rate": 0.00017686393750497112, "loss": 1.5721, "step": 6822 }, { "epoch": 0.24434616004440704, "grad_norm": 1.7470086812973022, "learning_rate": 0.0001768565173040628, "loss": 1.3168, "step": 6823 }, { "epoch": 0.24438197217397534, "grad_norm": 2.435948371887207, "learning_rate": 0.0001768490960691491, "loss": 1.7345, "step": 6824 }, { "epoch": 0.2444177843035436, "grad_norm": 1.1866915225982666, "learning_rate": 0.00017684167380033002, "loss": 1.6174, "step": 6825 }, { "epoch": 0.2444535964331119, "grad_norm": 1.7603780031204224, "learning_rate": 0.00017683425049770527, "loss": 1.6618, "step": 6826 }, { "epoch": 0.24448940856268017, "grad_norm": 1.6207976341247559, "learning_rate": 0.00017682682616137484, "loss": 1.5455, "step": 6827 }, { "epoch": 0.24452522069224847, "grad_norm": 1.5510258674621582, "learning_rate": 0.00017681940079143855, "loss": 1.5884, "step": 6828 }, { "epoch": 0.24456103282181674, "grad_norm": 1.862894892692566, "learning_rate": 0.0001768119743879963, "loss": 1.4781, "step": 6829 }, { "epoch": 0.24459684495138503, "grad_norm": 1.6464170217514038, "learning_rate": 0.00017680454695114802, "loss": 1.7318, "step": 6830 }, { "epoch": 0.24463265708095333, "grad_norm": 1.6220334768295288, "learning_rate": 0.00017679711848099362, "loss": 1.4225, "step": 6831 }, { "epoch": 0.2446684692105216, "grad_norm": 1.9520400762557983, "learning_rate": 0.0001767896889776331, "loss": 1.8062, "step": 6832 }, { "epoch": 0.2447042813400899, "grad_norm": 1.7717475891113281, "learning_rate": 0.00017678225844116628, "loss": 1.5246, "step": 6833 }, { "epoch": 0.24474009346965817, "grad_norm": 1.6127135753631592, "learning_rate": 0.00017677482687169328, "loss": 1.6002, "step": 6834 }, { "epoch": 0.24477590559922646, "grad_norm": 1.3745635747909546, "learning_rate": 0.000176767394269314, "loss": 1.6012, "step": 6835 }, { "epoch": 0.24481171772879473, "grad_norm": 1.3935896158218384, "learning_rate": 0.00017675996063412844, "loss": 1.5386, "step": 6836 }, { "epoch": 0.24484752985836303, "grad_norm": 1.7197144031524658, "learning_rate": 0.00017675252596623665, "loss": 1.7675, "step": 6837 }, { "epoch": 0.24488334198793132, "grad_norm": 1.8122268915176392, "learning_rate": 0.00017674509026573864, "loss": 1.7868, "step": 6838 }, { "epoch": 0.2449191541174996, "grad_norm": 1.5085411071777344, "learning_rate": 0.00017673765353273438, "loss": 1.6055, "step": 6839 }, { "epoch": 0.2449549662470679, "grad_norm": 2.0807392597198486, "learning_rate": 0.00017673021576732404, "loss": 1.5603, "step": 6840 }, { "epoch": 0.24499077837663616, "grad_norm": 2.034391164779663, "learning_rate": 0.00017672277696960756, "loss": 1.2525, "step": 6841 }, { "epoch": 0.24502659050620446, "grad_norm": 1.4948173761367798, "learning_rate": 0.0001767153371396851, "loss": 1.6105, "step": 6842 }, { "epoch": 0.24506240263577272, "grad_norm": 1.3254578113555908, "learning_rate": 0.00017670789627765676, "loss": 1.5827, "step": 6843 }, { "epoch": 0.24509821476534102, "grad_norm": 1.611053228378296, "learning_rate": 0.0001767004543836226, "loss": 1.7118, "step": 6844 }, { "epoch": 0.24513402689490932, "grad_norm": 1.7157574892044067, "learning_rate": 0.00017669301145768277, "loss": 1.6883, "step": 6845 }, { "epoch": 0.2451698390244776, "grad_norm": 1.8786518573760986, "learning_rate": 0.0001766855674999374, "loss": 1.5183, "step": 6846 }, { "epoch": 0.24520565115404588, "grad_norm": 1.7475110292434692, "learning_rate": 0.00017667812251048664, "loss": 1.5028, "step": 6847 }, { "epoch": 0.24524146328361415, "grad_norm": 1.989743709564209, "learning_rate": 0.00017667067648943064, "loss": 1.8715, "step": 6848 }, { "epoch": 0.24527727541318245, "grad_norm": 1.4464224576950073, "learning_rate": 0.00017666322943686957, "loss": 1.577, "step": 6849 }, { "epoch": 0.24531308754275072, "grad_norm": 2.7265331745147705, "learning_rate": 0.00017665578135290364, "loss": 1.4392, "step": 6850 }, { "epoch": 0.24534889967231902, "grad_norm": 2.068549156188965, "learning_rate": 0.00017664833223763306, "loss": 1.5901, "step": 6851 }, { "epoch": 0.2453847118018873, "grad_norm": 1.8748308420181274, "learning_rate": 0.00017664088209115805, "loss": 1.581, "step": 6852 }, { "epoch": 0.24542052393145558, "grad_norm": 1.8242149353027344, "learning_rate": 0.00017663343091357881, "loss": 1.8448, "step": 6853 }, { "epoch": 0.24545633606102388, "grad_norm": 1.4797003269195557, "learning_rate": 0.00017662597870499562, "loss": 1.4438, "step": 6854 }, { "epoch": 0.24549214819059215, "grad_norm": 1.5570054054260254, "learning_rate": 0.00017661852546550875, "loss": 1.6526, "step": 6855 }, { "epoch": 0.24552796032016044, "grad_norm": 1.3332992792129517, "learning_rate": 0.00017661107119521842, "loss": 1.5923, "step": 6856 }, { "epoch": 0.2455637724497287, "grad_norm": 2.3711464405059814, "learning_rate": 0.00017660361589422497, "loss": 1.4343, "step": 6857 }, { "epoch": 0.245599584579297, "grad_norm": 1.5084023475646973, "learning_rate": 0.00017659615956262865, "loss": 1.7877, "step": 6858 }, { "epoch": 0.2456353967088653, "grad_norm": 1.477497935295105, "learning_rate": 0.00017658870220052983, "loss": 1.5231, "step": 6859 }, { "epoch": 0.24567120883843357, "grad_norm": 1.6098153591156006, "learning_rate": 0.00017658124380802882, "loss": 1.4208, "step": 6860 }, { "epoch": 0.24570702096800187, "grad_norm": 1.4886384010314941, "learning_rate": 0.00017657378438522593, "loss": 1.6053, "step": 6861 }, { "epoch": 0.24574283309757014, "grad_norm": 1.6936547756195068, "learning_rate": 0.00017656632393222156, "loss": 1.426, "step": 6862 }, { "epoch": 0.24577864522713844, "grad_norm": 1.6345033645629883, "learning_rate": 0.00017655886244911603, "loss": 1.5935, "step": 6863 }, { "epoch": 0.2458144573567067, "grad_norm": 1.7725507020950317, "learning_rate": 0.00017655139993600982, "loss": 1.3166, "step": 6864 }, { "epoch": 0.245850269486275, "grad_norm": 1.4159860610961914, "learning_rate": 0.0001765439363930032, "loss": 1.5238, "step": 6865 }, { "epoch": 0.2458860816158433, "grad_norm": 1.7116299867630005, "learning_rate": 0.00017653647182019671, "loss": 1.7032, "step": 6866 }, { "epoch": 0.24592189374541157, "grad_norm": 2.9299378395080566, "learning_rate": 0.0001765290062176907, "loss": 1.5626, "step": 6867 }, { "epoch": 0.24595770587497987, "grad_norm": 1.746596336364746, "learning_rate": 0.00017652153958558562, "loss": 1.3949, "step": 6868 }, { "epoch": 0.24599351800454813, "grad_norm": 1.607316017150879, "learning_rate": 0.00017651407192398195, "loss": 1.4019, "step": 6869 }, { "epoch": 0.24602933013411643, "grad_norm": 1.8668955564498901, "learning_rate": 0.0001765066032329801, "loss": 1.5584, "step": 6870 }, { "epoch": 0.2460651422636847, "grad_norm": 1.5562617778778076, "learning_rate": 0.0001764991335126806, "loss": 1.4447, "step": 6871 }, { "epoch": 0.246100954393253, "grad_norm": 1.6031650304794312, "learning_rate": 0.000176491662763184, "loss": 1.603, "step": 6872 }, { "epoch": 0.2461367665228213, "grad_norm": 1.7676963806152344, "learning_rate": 0.0001764841909845907, "loss": 1.6027, "step": 6873 }, { "epoch": 0.24617257865238956, "grad_norm": 1.4618767499923706, "learning_rate": 0.00017647671817700122, "loss": 1.149, "step": 6874 }, { "epoch": 0.24620839078195786, "grad_norm": 1.703264832496643, "learning_rate": 0.00017646924434051617, "loss": 1.9018, "step": 6875 }, { "epoch": 0.24624420291152613, "grad_norm": 1.5716698169708252, "learning_rate": 0.0001764617694752361, "loss": 1.3323, "step": 6876 }, { "epoch": 0.24628001504109442, "grad_norm": 1.4548003673553467, "learning_rate": 0.00017645429358126156, "loss": 1.6873, "step": 6877 }, { "epoch": 0.2463158271706627, "grad_norm": 1.5308620929718018, "learning_rate": 0.0001764468166586931, "loss": 1.5299, "step": 6878 }, { "epoch": 0.246351639300231, "grad_norm": 1.557874083518982, "learning_rate": 0.00017643933870763133, "loss": 1.4238, "step": 6879 }, { "epoch": 0.2463874514297993, "grad_norm": 2.1845991611480713, "learning_rate": 0.00017643185972817684, "loss": 1.5245, "step": 6880 }, { "epoch": 0.24642326355936756, "grad_norm": 1.7284934520721436, "learning_rate": 0.0001764243797204303, "loss": 1.3871, "step": 6881 }, { "epoch": 0.24645907568893585, "grad_norm": 1.6655116081237793, "learning_rate": 0.0001764168986844923, "loss": 1.6806, "step": 6882 }, { "epoch": 0.24649488781850412, "grad_norm": 2.0219454765319824, "learning_rate": 0.00017640941662046345, "loss": 1.6322, "step": 6883 }, { "epoch": 0.24653069994807242, "grad_norm": 1.672935962677002, "learning_rate": 0.00017640193352844454, "loss": 1.532, "step": 6884 }, { "epoch": 0.2465665120776407, "grad_norm": 1.3515428304672241, "learning_rate": 0.00017639444940853612, "loss": 1.466, "step": 6885 }, { "epoch": 0.24660232420720898, "grad_norm": 1.778982162475586, "learning_rate": 0.00017638696426083893, "loss": 1.67, "step": 6886 }, { "epoch": 0.24663813633677725, "grad_norm": 1.437366008758545, "learning_rate": 0.00017637947808545369, "loss": 1.282, "step": 6887 }, { "epoch": 0.24667394846634555, "grad_norm": 1.3975059986114502, "learning_rate": 0.00017637199088248106, "loss": 1.7724, "step": 6888 }, { "epoch": 0.24670976059591385, "grad_norm": 1.6595691442489624, "learning_rate": 0.00017636450265202185, "loss": 1.5012, "step": 6889 }, { "epoch": 0.24674557272548212, "grad_norm": 1.3656624555587769, "learning_rate": 0.00017635701339417672, "loss": 1.3337, "step": 6890 }, { "epoch": 0.2467813848550504, "grad_norm": 1.6880717277526855, "learning_rate": 0.0001763495231090465, "loss": 1.8932, "step": 6891 }, { "epoch": 0.24681719698461868, "grad_norm": 1.7251242399215698, "learning_rate": 0.0001763420317967319, "loss": 1.3787, "step": 6892 }, { "epoch": 0.24685300911418698, "grad_norm": 1.4598206281661987, "learning_rate": 0.00017633453945733373, "loss": 1.3369, "step": 6893 }, { "epoch": 0.24688882124375525, "grad_norm": 2.0298573970794678, "learning_rate": 0.00017632704609095283, "loss": 1.7565, "step": 6894 }, { "epoch": 0.24692463337332354, "grad_norm": 1.364871859550476, "learning_rate": 0.00017631955169768998, "loss": 1.6694, "step": 6895 }, { "epoch": 0.24696044550289184, "grad_norm": 1.5108788013458252, "learning_rate": 0.00017631205627764598, "loss": 1.6086, "step": 6896 }, { "epoch": 0.2469962576324601, "grad_norm": 1.313675045967102, "learning_rate": 0.0001763045598309217, "loss": 1.7611, "step": 6897 }, { "epoch": 0.2470320697620284, "grad_norm": 1.818166732788086, "learning_rate": 0.00017629706235761802, "loss": 1.6188, "step": 6898 }, { "epoch": 0.24706788189159667, "grad_norm": 1.8856993913650513, "learning_rate": 0.00017628956385783577, "loss": 1.7872, "step": 6899 }, { "epoch": 0.24710369402116497, "grad_norm": 1.811854362487793, "learning_rate": 0.00017628206433167583, "loss": 1.6524, "step": 6900 }, { "epoch": 0.24713950615073324, "grad_norm": 1.3475021123886108, "learning_rate": 0.00017627456377923911, "loss": 1.4307, "step": 6901 }, { "epoch": 0.24717531828030154, "grad_norm": 1.7281248569488525, "learning_rate": 0.00017626706220062654, "loss": 1.6302, "step": 6902 }, { "epoch": 0.24721113040986983, "grad_norm": 2.6777141094207764, "learning_rate": 0.00017625955959593904, "loss": 1.6385, "step": 6903 }, { "epoch": 0.2472469425394381, "grad_norm": 1.415355920791626, "learning_rate": 0.0001762520559652775, "loss": 1.5923, "step": 6904 }, { "epoch": 0.2472827546690064, "grad_norm": 1.761030912399292, "learning_rate": 0.00017624455130874292, "loss": 1.6887, "step": 6905 }, { "epoch": 0.24731856679857467, "grad_norm": 1.5250886678695679, "learning_rate": 0.00017623704562643624, "loss": 1.7323, "step": 6906 }, { "epoch": 0.24735437892814297, "grad_norm": 1.85284423828125, "learning_rate": 0.00017622953891845847, "loss": 1.4674, "step": 6907 }, { "epoch": 0.24739019105771123, "grad_norm": 2.0614492893218994, "learning_rate": 0.00017622203118491055, "loss": 1.7599, "step": 6908 }, { "epoch": 0.24742600318727953, "grad_norm": 2.1460821628570557, "learning_rate": 0.00017621452242589354, "loss": 1.437, "step": 6909 }, { "epoch": 0.24746181531684783, "grad_norm": 1.4736385345458984, "learning_rate": 0.00017620701264150845, "loss": 1.7396, "step": 6910 }, { "epoch": 0.2474976274464161, "grad_norm": 1.7290641069412231, "learning_rate": 0.0001761995018318563, "loss": 1.508, "step": 6911 }, { "epoch": 0.2475334395759844, "grad_norm": 1.502830982208252, "learning_rate": 0.00017619198999703812, "loss": 1.455, "step": 6912 }, { "epoch": 0.24756925170555266, "grad_norm": 1.5334092378616333, "learning_rate": 0.00017618447713715503, "loss": 1.6101, "step": 6913 }, { "epoch": 0.24760506383512096, "grad_norm": 1.4206371307373047, "learning_rate": 0.00017617696325230805, "loss": 1.6806, "step": 6914 }, { "epoch": 0.24764087596468923, "grad_norm": 1.6280468702316284, "learning_rate": 0.0001761694483425983, "loss": 1.4521, "step": 6915 }, { "epoch": 0.24767668809425752, "grad_norm": 1.273987054824829, "learning_rate": 0.00017616193240812687, "loss": 1.5572, "step": 6916 }, { "epoch": 0.24771250022382582, "grad_norm": 1.521125078201294, "learning_rate": 0.00017615441544899488, "loss": 1.6782, "step": 6917 }, { "epoch": 0.2477483123533941, "grad_norm": 1.4935173988342285, "learning_rate": 0.00017614689746530345, "loss": 1.7162, "step": 6918 }, { "epoch": 0.2477841244829624, "grad_norm": 1.9287409782409668, "learning_rate": 0.00017613937845715376, "loss": 1.5272, "step": 6919 }, { "epoch": 0.24781993661253066, "grad_norm": 1.6758594512939453, "learning_rate": 0.00017613185842464693, "loss": 1.7733, "step": 6920 }, { "epoch": 0.24785574874209895, "grad_norm": 2.1583969593048096, "learning_rate": 0.00017612433736788417, "loss": 1.6371, "step": 6921 }, { "epoch": 0.24789156087166722, "grad_norm": 1.667779803276062, "learning_rate": 0.0001761168152869666, "loss": 1.5351, "step": 6922 }, { "epoch": 0.24792737300123552, "grad_norm": 1.3906711339950562, "learning_rate": 0.00017610929218199553, "loss": 1.6684, "step": 6923 }, { "epoch": 0.24796318513080381, "grad_norm": 1.3722862005233765, "learning_rate": 0.00017610176805307206, "loss": 1.411, "step": 6924 }, { "epoch": 0.24799899726037208, "grad_norm": 1.9369726181030273, "learning_rate": 0.00017609424290029746, "loss": 1.5838, "step": 6925 }, { "epoch": 0.24803480938994038, "grad_norm": 2.327025890350342, "learning_rate": 0.000176086716723773, "loss": 1.6616, "step": 6926 }, { "epoch": 0.24807062151950865, "grad_norm": 1.8831285238265991, "learning_rate": 0.0001760791895235999, "loss": 1.2508, "step": 6927 }, { "epoch": 0.24810643364907695, "grad_norm": 1.6104234457015991, "learning_rate": 0.00017607166129987944, "loss": 1.5941, "step": 6928 }, { "epoch": 0.24814224577864522, "grad_norm": 1.5727404356002808, "learning_rate": 0.0001760641320527129, "loss": 1.4856, "step": 6929 }, { "epoch": 0.2481780579082135, "grad_norm": 2.3073596954345703, "learning_rate": 0.00017605660178220158, "loss": 1.8495, "step": 6930 }, { "epoch": 0.2482138700377818, "grad_norm": 1.5391161441802979, "learning_rate": 0.0001760490704884468, "loss": 1.7637, "step": 6931 }, { "epoch": 0.24824968216735008, "grad_norm": 2.0064144134521484, "learning_rate": 0.00017604153817154985, "loss": 1.8597, "step": 6932 }, { "epoch": 0.24828549429691837, "grad_norm": 1.7063194513320923, "learning_rate": 0.00017603400483161212, "loss": 1.2984, "step": 6933 }, { "epoch": 0.24832130642648664, "grad_norm": 1.3737378120422363, "learning_rate": 0.0001760264704687349, "loss": 1.5487, "step": 6934 }, { "epoch": 0.24835711855605494, "grad_norm": 1.4802964925765991, "learning_rate": 0.00017601893508301962, "loss": 1.2851, "step": 6935 }, { "epoch": 0.2483929306856232, "grad_norm": 1.6147825717926025, "learning_rate": 0.0001760113986745676, "loss": 1.7134, "step": 6936 }, { "epoch": 0.2484287428151915, "grad_norm": 2.331864833831787, "learning_rate": 0.00017600386124348028, "loss": 1.3899, "step": 6937 }, { "epoch": 0.2484645549447598, "grad_norm": 1.2941458225250244, "learning_rate": 0.00017599632278985904, "loss": 1.5272, "step": 6938 }, { "epoch": 0.24850036707432807, "grad_norm": 1.85355544090271, "learning_rate": 0.00017598878331380528, "loss": 1.6488, "step": 6939 }, { "epoch": 0.24853617920389637, "grad_norm": 1.6847429275512695, "learning_rate": 0.0001759812428154205, "loss": 1.4094, "step": 6940 }, { "epoch": 0.24857199133346464, "grad_norm": 2.6703569889068604, "learning_rate": 0.00017597370129480606, "loss": 1.2444, "step": 6941 }, { "epoch": 0.24860780346303293, "grad_norm": 1.4878178834915161, "learning_rate": 0.00017596615875206347, "loss": 1.6358, "step": 6942 }, { "epoch": 0.2486436155926012, "grad_norm": 1.5205473899841309, "learning_rate": 0.00017595861518729424, "loss": 1.5765, "step": 6943 }, { "epoch": 0.2486794277221695, "grad_norm": 1.6328760385513306, "learning_rate": 0.00017595107060059984, "loss": 1.5, "step": 6944 }, { "epoch": 0.2487152398517378, "grad_norm": 1.4762791395187378, "learning_rate": 0.0001759435249920817, "loss": 1.9516, "step": 6945 }, { "epoch": 0.24875105198130606, "grad_norm": 1.660315752029419, "learning_rate": 0.0001759359783618414, "loss": 1.6335, "step": 6946 }, { "epoch": 0.24878686411087436, "grad_norm": 1.6185013055801392, "learning_rate": 0.00017592843070998049, "loss": 1.7045, "step": 6947 }, { "epoch": 0.24882267624044263, "grad_norm": 1.4820398092269897, "learning_rate": 0.00017592088203660045, "loss": 1.4543, "step": 6948 }, { "epoch": 0.24885848837001093, "grad_norm": 1.3168247938156128, "learning_rate": 0.00017591333234180293, "loss": 1.5607, "step": 6949 }, { "epoch": 0.2488943004995792, "grad_norm": 1.3358012437820435, "learning_rate": 0.0001759057816256894, "loss": 1.452, "step": 6950 }, { "epoch": 0.2489301126291475, "grad_norm": 2.118440866470337, "learning_rate": 0.00017589822988836148, "loss": 1.5474, "step": 6951 }, { "epoch": 0.2489659247587158, "grad_norm": 2.4116692543029785, "learning_rate": 0.00017589067712992082, "loss": 1.3814, "step": 6952 }, { "epoch": 0.24900173688828406, "grad_norm": 1.8422210216522217, "learning_rate": 0.00017588312335046897, "loss": 1.6097, "step": 6953 }, { "epoch": 0.24903754901785236, "grad_norm": 1.4137614965438843, "learning_rate": 0.00017587556855010755, "loss": 1.7229, "step": 6954 }, { "epoch": 0.24907336114742062, "grad_norm": 1.4702421426773071, "learning_rate": 0.00017586801272893827, "loss": 1.7011, "step": 6955 }, { "epoch": 0.24910917327698892, "grad_norm": 1.7273403406143188, "learning_rate": 0.00017586045588706273, "loss": 1.2071, "step": 6956 }, { "epoch": 0.2491449854065572, "grad_norm": 1.629349946975708, "learning_rate": 0.0001758528980245826, "loss": 1.6223, "step": 6957 }, { "epoch": 0.2491807975361255, "grad_norm": 1.4688459634780884, "learning_rate": 0.00017584533914159956, "loss": 1.2623, "step": 6958 }, { "epoch": 0.24921660966569378, "grad_norm": 1.8785773515701294, "learning_rate": 0.00017583777923821533, "loss": 1.5056, "step": 6959 }, { "epoch": 0.24925242179526205, "grad_norm": 1.7873069047927856, "learning_rate": 0.0001758302183145316, "loss": 1.5735, "step": 6960 }, { "epoch": 0.24928823392483035, "grad_norm": 1.8960673809051514, "learning_rate": 0.00017582265637065012, "loss": 1.7953, "step": 6961 }, { "epoch": 0.24932404605439862, "grad_norm": 1.3856395483016968, "learning_rate": 0.00017581509340667257, "loss": 1.5991, "step": 6962 }, { "epoch": 0.24935985818396691, "grad_norm": 1.5916454792022705, "learning_rate": 0.00017580752942270077, "loss": 1.7537, "step": 6963 }, { "epoch": 0.24939567031353518, "grad_norm": 1.445228099822998, "learning_rate": 0.0001757999644188364, "loss": 1.6487, "step": 6964 }, { "epoch": 0.24943148244310348, "grad_norm": 1.5743353366851807, "learning_rate": 0.0001757923983951813, "loss": 1.5899, "step": 6965 }, { "epoch": 0.24946729457267178, "grad_norm": 1.4328770637512207, "learning_rate": 0.00017578483135183726, "loss": 1.6986, "step": 6966 }, { "epoch": 0.24950310670224005, "grad_norm": 1.5256279706954956, "learning_rate": 0.00017577726328890604, "loss": 1.3874, "step": 6967 }, { "epoch": 0.24953891883180834, "grad_norm": 1.6930445432662964, "learning_rate": 0.0001757696942064895, "loss": 1.5721, "step": 6968 }, { "epoch": 0.2495747309613766, "grad_norm": 1.8914119005203247, "learning_rate": 0.00017576212410468949, "loss": 1.3895, "step": 6969 }, { "epoch": 0.2496105430909449, "grad_norm": 1.2605080604553223, "learning_rate": 0.00017575455298360782, "loss": 1.562, "step": 6970 }, { "epoch": 0.24964635522051318, "grad_norm": 1.4235647916793823, "learning_rate": 0.00017574698084334633, "loss": 1.6511, "step": 6971 }, { "epoch": 0.24968216735008147, "grad_norm": 1.5089948177337646, "learning_rate": 0.00017573940768400692, "loss": 1.2899, "step": 6972 }, { "epoch": 0.24971797947964977, "grad_norm": 1.1215814352035522, "learning_rate": 0.00017573183350569148, "loss": 1.2685, "step": 6973 }, { "epoch": 0.24975379160921804, "grad_norm": 1.6614004373550415, "learning_rate": 0.00017572425830850193, "loss": 1.4867, "step": 6974 }, { "epoch": 0.24978960373878634, "grad_norm": 1.3425636291503906, "learning_rate": 0.00017571668209254013, "loss": 1.6014, "step": 6975 }, { "epoch": 0.2498254158683546, "grad_norm": 1.719012975692749, "learning_rate": 0.00017570910485790805, "loss": 1.3348, "step": 6976 }, { "epoch": 0.2498612279979229, "grad_norm": 1.7479561567306519, "learning_rate": 0.00017570152660470765, "loss": 1.7141, "step": 6977 }, { "epoch": 0.24989704012749117, "grad_norm": 1.8728541135787964, "learning_rate": 0.00017569394733304083, "loss": 1.7683, "step": 6978 }, { "epoch": 0.24993285225705947, "grad_norm": 1.8570350408554077, "learning_rate": 0.00017568636704300958, "loss": 1.7784, "step": 6979 }, { "epoch": 0.24996866438662776, "grad_norm": 1.8610103130340576, "learning_rate": 0.0001756787857347159, "loss": 1.416, "step": 6980 }, { "epoch": 0.25000447651619606, "grad_norm": 1.5104469060897827, "learning_rate": 0.00017567120340826177, "loss": 1.6459, "step": 6981 }, { "epoch": 0.25004028864576433, "grad_norm": 1.4502947330474854, "learning_rate": 0.0001756636200637492, "loss": 1.3919, "step": 6982 }, { "epoch": 0.2500761007753326, "grad_norm": 1.8482967615127563, "learning_rate": 0.00017565603570128023, "loss": 1.5629, "step": 6983 }, { "epoch": 0.25011191290490087, "grad_norm": 1.9667346477508545, "learning_rate": 0.0001756484503209569, "loss": 1.6214, "step": 6984 }, { "epoch": 0.2501477250344692, "grad_norm": 1.8211997747421265, "learning_rate": 0.00017564086392288125, "loss": 1.8636, "step": 6985 }, { "epoch": 0.25018353716403746, "grad_norm": 1.3446124792099, "learning_rate": 0.00017563327650715535, "loss": 1.8394, "step": 6986 }, { "epoch": 0.25021934929360573, "grad_norm": 1.7953028678894043, "learning_rate": 0.00017562568807388126, "loss": 1.6372, "step": 6987 }, { "epoch": 0.25025516142317406, "grad_norm": 1.6460068225860596, "learning_rate": 0.0001756180986231611, "loss": 1.5185, "step": 6988 }, { "epoch": 0.2502909735527423, "grad_norm": 1.467795491218567, "learning_rate": 0.00017561050815509695, "loss": 1.5313, "step": 6989 }, { "epoch": 0.2503267856823106, "grad_norm": 1.5542267560958862, "learning_rate": 0.00017560291666979095, "loss": 1.1941, "step": 6990 }, { "epoch": 0.25036259781187886, "grad_norm": 1.5457571744918823, "learning_rate": 0.00017559532416734524, "loss": 1.7399, "step": 6991 }, { "epoch": 0.2503984099414472, "grad_norm": 1.6578049659729004, "learning_rate": 0.00017558773064786193, "loss": 1.4826, "step": 6992 }, { "epoch": 0.25043422207101546, "grad_norm": 2.136821746826172, "learning_rate": 0.0001755801361114432, "loss": 1.8302, "step": 6993 }, { "epoch": 0.2504700342005837, "grad_norm": 1.3996917009353638, "learning_rate": 0.00017557254055819126, "loss": 1.5399, "step": 6994 }, { "epoch": 0.25050584633015205, "grad_norm": 1.3739955425262451, "learning_rate": 0.00017556494398820823, "loss": 1.3365, "step": 6995 }, { "epoch": 0.2505416584597203, "grad_norm": 1.558883786201477, "learning_rate": 0.0001755573464015964, "loss": 1.6076, "step": 6996 }, { "epoch": 0.2505774705892886, "grad_norm": 1.6406570672988892, "learning_rate": 0.00017554974779845792, "loss": 1.8316, "step": 6997 }, { "epoch": 0.25061328271885686, "grad_norm": 1.7725262641906738, "learning_rate": 0.000175542148178895, "loss": 1.7184, "step": 6998 }, { "epoch": 0.2506490948484252, "grad_norm": 1.7781455516815186, "learning_rate": 0.00017553454754300996, "loss": 1.2366, "step": 6999 }, { "epoch": 0.25068490697799345, "grad_norm": 1.753017783164978, "learning_rate": 0.000175526945890905, "loss": 1.6132, "step": 7000 }, { "epoch": 0.2507207191075617, "grad_norm": 2.3922290802001953, "learning_rate": 0.0001755193432226824, "loss": 1.5879, "step": 7001 }, { "epoch": 0.25075653123713004, "grad_norm": 1.8872179985046387, "learning_rate": 0.00017551173953844445, "loss": 1.4223, "step": 7002 }, { "epoch": 0.2507923433666983, "grad_norm": 1.5126007795333862, "learning_rate": 0.00017550413483829344, "loss": 1.7051, "step": 7003 }, { "epoch": 0.2508281554962666, "grad_norm": 1.5426768064498901, "learning_rate": 0.0001754965291223317, "loss": 1.3291, "step": 7004 }, { "epoch": 0.25086396762583485, "grad_norm": 1.5832163095474243, "learning_rate": 0.00017548892239066156, "loss": 1.26, "step": 7005 }, { "epoch": 0.2508997797554032, "grad_norm": 1.4844779968261719, "learning_rate": 0.00017548131464338533, "loss": 1.697, "step": 7006 }, { "epoch": 0.25093559188497144, "grad_norm": 1.5662215948104858, "learning_rate": 0.00017547370588060537, "loss": 1.3258, "step": 7007 }, { "epoch": 0.2509714040145397, "grad_norm": 1.3159148693084717, "learning_rate": 0.00017546609610242405, "loss": 1.589, "step": 7008 }, { "epoch": 0.25100721614410804, "grad_norm": 1.476625919342041, "learning_rate": 0.00017545848530894377, "loss": 1.715, "step": 7009 }, { "epoch": 0.2510430282736763, "grad_norm": 1.64826238155365, "learning_rate": 0.0001754508735002669, "loss": 1.8173, "step": 7010 }, { "epoch": 0.2510788404032446, "grad_norm": 1.6059377193450928, "learning_rate": 0.00017544326067649583, "loss": 1.6255, "step": 7011 }, { "epoch": 0.25111465253281284, "grad_norm": 1.6135188341140747, "learning_rate": 0.00017543564683773302, "loss": 1.6396, "step": 7012 }, { "epoch": 0.25115046466238117, "grad_norm": 1.242232322692871, "learning_rate": 0.00017542803198408087, "loss": 1.6733, "step": 7013 }, { "epoch": 0.25118627679194944, "grad_norm": 2.01194167137146, "learning_rate": 0.00017542041611564186, "loss": 1.4317, "step": 7014 }, { "epoch": 0.2512220889215177, "grad_norm": 1.904948353767395, "learning_rate": 0.00017541279923251844, "loss": 1.6848, "step": 7015 }, { "epoch": 0.25125790105108603, "grad_norm": 1.638850212097168, "learning_rate": 0.00017540518133481308, "loss": 1.3428, "step": 7016 }, { "epoch": 0.2512937131806543, "grad_norm": 2.054460287094116, "learning_rate": 0.00017539756242262826, "loss": 1.5478, "step": 7017 }, { "epoch": 0.25132952531022257, "grad_norm": 1.5042341947555542, "learning_rate": 0.0001753899424960665, "loss": 1.6176, "step": 7018 }, { "epoch": 0.25136533743979084, "grad_norm": 1.8969051837921143, "learning_rate": 0.0001753823215552303, "loss": 1.3305, "step": 7019 }, { "epoch": 0.25140114956935916, "grad_norm": 1.6631965637207031, "learning_rate": 0.00017537469960022221, "loss": 1.7607, "step": 7020 }, { "epoch": 0.25143696169892743, "grad_norm": 1.4155299663543701, "learning_rate": 0.00017536707663114477, "loss": 1.4719, "step": 7021 }, { "epoch": 0.2514727738284957, "grad_norm": 1.9903786182403564, "learning_rate": 0.00017535945264810052, "loss": 1.5096, "step": 7022 }, { "epoch": 0.251508585958064, "grad_norm": 1.4660794734954834, "learning_rate": 0.00017535182765119204, "loss": 1.3804, "step": 7023 }, { "epoch": 0.2515443980876323, "grad_norm": 1.6268247365951538, "learning_rate": 0.00017534420164052193, "loss": 1.4592, "step": 7024 }, { "epoch": 0.25158021021720056, "grad_norm": 1.503402590751648, "learning_rate": 0.00017533657461619274, "loss": 1.4945, "step": 7025 }, { "epoch": 0.25161602234676883, "grad_norm": 1.4199435710906982, "learning_rate": 0.00017532894657830715, "loss": 1.7763, "step": 7026 }, { "epoch": 0.25165183447633716, "grad_norm": 1.6909997463226318, "learning_rate": 0.00017532131752696776, "loss": 1.546, "step": 7027 }, { "epoch": 0.2516876466059054, "grad_norm": 1.7875890731811523, "learning_rate": 0.00017531368746227718, "loss": 1.501, "step": 7028 }, { "epoch": 0.2517234587354737, "grad_norm": 2.4746861457824707, "learning_rate": 0.00017530605638433805, "loss": 1.576, "step": 7029 }, { "epoch": 0.251759270865042, "grad_norm": 1.6079216003417969, "learning_rate": 0.00017529842429325312, "loss": 1.7127, "step": 7030 }, { "epoch": 0.2517950829946103, "grad_norm": 1.7867952585220337, "learning_rate": 0.00017529079118912502, "loss": 1.1966, "step": 7031 }, { "epoch": 0.25183089512417856, "grad_norm": 1.4089163541793823, "learning_rate": 0.00017528315707205643, "loss": 1.7839, "step": 7032 }, { "epoch": 0.2518667072537468, "grad_norm": 1.644866704940796, "learning_rate": 0.00017527552194215005, "loss": 1.4532, "step": 7033 }, { "epoch": 0.25190251938331515, "grad_norm": 1.627656102180481, "learning_rate": 0.00017526788579950864, "loss": 1.7095, "step": 7034 }, { "epoch": 0.2519383315128834, "grad_norm": 1.9081600904464722, "learning_rate": 0.0001752602486442349, "loss": 1.6353, "step": 7035 }, { "epoch": 0.2519741436424517, "grad_norm": 1.579540729522705, "learning_rate": 0.0001752526104764316, "loss": 1.4417, "step": 7036 }, { "epoch": 0.25200995577201996, "grad_norm": 1.7195303440093994, "learning_rate": 0.0001752449712962015, "loss": 1.3465, "step": 7037 }, { "epoch": 0.2520457679015883, "grad_norm": 1.6513937711715698, "learning_rate": 0.00017523733110364736, "loss": 1.3911, "step": 7038 }, { "epoch": 0.25208158003115655, "grad_norm": 1.918748140335083, "learning_rate": 0.000175229689898872, "loss": 1.728, "step": 7039 }, { "epoch": 0.2521173921607248, "grad_norm": 1.678743600845337, "learning_rate": 0.00017522204768197818, "loss": 1.3992, "step": 7040 }, { "epoch": 0.25215320429029314, "grad_norm": 1.795161247253418, "learning_rate": 0.00017521440445306875, "loss": 1.6584, "step": 7041 }, { "epoch": 0.2521890164198614, "grad_norm": 1.783471941947937, "learning_rate": 0.00017520676021224652, "loss": 1.5853, "step": 7042 }, { "epoch": 0.2522248285494297, "grad_norm": 1.6953623294830322, "learning_rate": 0.00017519911495961435, "loss": 1.6603, "step": 7043 }, { "epoch": 0.25226064067899795, "grad_norm": 1.5812227725982666, "learning_rate": 0.0001751914686952751, "loss": 1.6276, "step": 7044 }, { "epoch": 0.2522964528085663, "grad_norm": 1.6534466743469238, "learning_rate": 0.0001751838214193316, "loss": 1.6614, "step": 7045 }, { "epoch": 0.25233226493813454, "grad_norm": 1.963584303855896, "learning_rate": 0.0001751761731318868, "loss": 1.3825, "step": 7046 }, { "epoch": 0.2523680770677028, "grad_norm": 1.7353453636169434, "learning_rate": 0.00017516852383304353, "loss": 1.3718, "step": 7047 }, { "epoch": 0.25240388919727114, "grad_norm": 1.7376201152801514, "learning_rate": 0.00017516087352290472, "loss": 1.6218, "step": 7048 }, { "epoch": 0.2524397013268394, "grad_norm": 1.3723000288009644, "learning_rate": 0.00017515322220157333, "loss": 1.234, "step": 7049 }, { "epoch": 0.2524755134564077, "grad_norm": 1.7133162021636963, "learning_rate": 0.0001751455698691523, "loss": 1.1578, "step": 7050 }, { "epoch": 0.25251132558597594, "grad_norm": 1.6886383295059204, "learning_rate": 0.00017513791652574453, "loss": 1.6405, "step": 7051 }, { "epoch": 0.25254713771554427, "grad_norm": 1.5092798471450806, "learning_rate": 0.00017513026217145302, "loss": 1.7148, "step": 7052 }, { "epoch": 0.25258294984511254, "grad_norm": 1.4195395708084106, "learning_rate": 0.00017512260680638072, "loss": 1.4879, "step": 7053 }, { "epoch": 0.2526187619746808, "grad_norm": 1.9386016130447388, "learning_rate": 0.00017511495043063066, "loss": 1.7046, "step": 7054 }, { "epoch": 0.25265457410424913, "grad_norm": 1.3876835107803345, "learning_rate": 0.00017510729304430584, "loss": 1.5976, "step": 7055 }, { "epoch": 0.2526903862338174, "grad_norm": 1.3243073225021362, "learning_rate": 0.00017509963464750928, "loss": 1.4122, "step": 7056 }, { "epoch": 0.25272619836338567, "grad_norm": 2.481052875518799, "learning_rate": 0.000175091975240344, "loss": 1.6062, "step": 7057 }, { "epoch": 0.25276201049295394, "grad_norm": 1.4827232360839844, "learning_rate": 0.00017508431482291304, "loss": 1.3834, "step": 7058 }, { "epoch": 0.25279782262252226, "grad_norm": 1.5816888809204102, "learning_rate": 0.0001750766533953195, "loss": 1.8041, "step": 7059 }, { "epoch": 0.25283363475209053, "grad_norm": 1.6099169254302979, "learning_rate": 0.00017506899095766641, "loss": 1.5866, "step": 7060 }, { "epoch": 0.2528694468816588, "grad_norm": 1.552196979522705, "learning_rate": 0.0001750613275100569, "loss": 1.3113, "step": 7061 }, { "epoch": 0.2529052590112271, "grad_norm": 1.621914029121399, "learning_rate": 0.00017505366305259402, "loss": 1.4599, "step": 7062 }, { "epoch": 0.2529410711407954, "grad_norm": 1.5911298990249634, "learning_rate": 0.00017504599758538095, "loss": 1.3794, "step": 7063 }, { "epoch": 0.25297688327036366, "grad_norm": 1.4210929870605469, "learning_rate": 0.00017503833110852078, "loss": 1.3875, "step": 7064 }, { "epoch": 0.25301269539993193, "grad_norm": 2.398237705230713, "learning_rate": 0.00017503066362211663, "loss": 1.6252, "step": 7065 }, { "epoch": 0.25304850752950026, "grad_norm": 1.9180196523666382, "learning_rate": 0.00017502299512627172, "loss": 1.6482, "step": 7066 }, { "epoch": 0.2530843196590685, "grad_norm": 1.9644347429275513, "learning_rate": 0.00017501532562108916, "loss": 1.3261, "step": 7067 }, { "epoch": 0.2531201317886368, "grad_norm": 1.9443340301513672, "learning_rate": 0.00017500765510667217, "loss": 1.5519, "step": 7068 }, { "epoch": 0.2531559439182051, "grad_norm": 2.6250624656677246, "learning_rate": 0.0001749999835831239, "loss": 1.7498, "step": 7069 }, { "epoch": 0.2531917560477734, "grad_norm": 1.7860009670257568, "learning_rate": 0.00017499231105054763, "loss": 1.4288, "step": 7070 }, { "epoch": 0.25322756817734166, "grad_norm": 1.8363738059997559, "learning_rate": 0.00017498463750904652, "loss": 1.1333, "step": 7071 }, { "epoch": 0.2532633803069099, "grad_norm": 1.4646892547607422, "learning_rate": 0.00017497696295872385, "loss": 1.7276, "step": 7072 }, { "epoch": 0.25329919243647825, "grad_norm": 1.5846657752990723, "learning_rate": 0.00017496928739968288, "loss": 1.693, "step": 7073 }, { "epoch": 0.2533350045660465, "grad_norm": 1.4653985500335693, "learning_rate": 0.0001749616108320268, "loss": 1.4689, "step": 7074 }, { "epoch": 0.2533708166956148, "grad_norm": 1.4848623275756836, "learning_rate": 0.000174953933255859, "loss": 1.4579, "step": 7075 }, { "epoch": 0.2534066288251831, "grad_norm": 1.226401448249817, "learning_rate": 0.0001749462546712827, "loss": 1.2865, "step": 7076 }, { "epoch": 0.2534424409547514, "grad_norm": 1.9527426958084106, "learning_rate": 0.00017493857507840116, "loss": 1.3185, "step": 7077 }, { "epoch": 0.25347825308431965, "grad_norm": 2.5343284606933594, "learning_rate": 0.0001749308944773178, "loss": 1.7735, "step": 7078 }, { "epoch": 0.2535140652138879, "grad_norm": 1.6038436889648438, "learning_rate": 0.0001749232128681359, "loss": 1.5201, "step": 7079 }, { "epoch": 0.25354987734345624, "grad_norm": 1.7659517526626587, "learning_rate": 0.00017491553025095882, "loss": 1.8174, "step": 7080 }, { "epoch": 0.2535856894730245, "grad_norm": 1.3074805736541748, "learning_rate": 0.00017490784662588992, "loss": 1.4389, "step": 7081 }, { "epoch": 0.2536215016025928, "grad_norm": 1.4077818393707275, "learning_rate": 0.00017490016199303256, "loss": 1.4818, "step": 7082 }, { "epoch": 0.2536573137321611, "grad_norm": 1.844086766242981, "learning_rate": 0.00017489247635249012, "loss": 1.7697, "step": 7083 }, { "epoch": 0.2536931258617294, "grad_norm": 1.78142249584198, "learning_rate": 0.00017488478970436604, "loss": 1.5957, "step": 7084 }, { "epoch": 0.25372893799129764, "grad_norm": 1.8254570960998535, "learning_rate": 0.0001748771020487637, "loss": 1.4769, "step": 7085 }, { "epoch": 0.2537647501208659, "grad_norm": 1.7204885482788086, "learning_rate": 0.00017486941338578653, "loss": 1.4355, "step": 7086 }, { "epoch": 0.25380056225043424, "grad_norm": 1.4895552396774292, "learning_rate": 0.000174861723715538, "loss": 1.407, "step": 7087 }, { "epoch": 0.2538363743800025, "grad_norm": 2.2478761672973633, "learning_rate": 0.0001748540330381215, "loss": 1.7522, "step": 7088 }, { "epoch": 0.2538721865095708, "grad_norm": 1.7592345476150513, "learning_rate": 0.00017484634135364057, "loss": 1.3917, "step": 7089 }, { "epoch": 0.2539079986391391, "grad_norm": 1.845581293106079, "learning_rate": 0.00017483864866219868, "loss": 1.4701, "step": 7090 }, { "epoch": 0.25394381076870737, "grad_norm": 1.259600043296814, "learning_rate": 0.00017483095496389928, "loss": 1.203, "step": 7091 }, { "epoch": 0.25397962289827564, "grad_norm": 1.8078480958938599, "learning_rate": 0.0001748232602588459, "loss": 1.5376, "step": 7092 }, { "epoch": 0.2540154350278439, "grad_norm": 1.4246379137039185, "learning_rate": 0.0001748155645471421, "loss": 1.7493, "step": 7093 }, { "epoch": 0.25405124715741223, "grad_norm": 1.7548315525054932, "learning_rate": 0.00017480786782889137, "loss": 1.3788, "step": 7094 }, { "epoch": 0.2540870592869805, "grad_norm": 1.760630488395691, "learning_rate": 0.00017480017010419724, "loss": 1.6053, "step": 7095 }, { "epoch": 0.25412287141654877, "grad_norm": 1.9188206195831299, "learning_rate": 0.00017479247137316335, "loss": 1.5271, "step": 7096 }, { "epoch": 0.2541586835461171, "grad_norm": 1.4333956241607666, "learning_rate": 0.0001747847716358932, "loss": 1.3216, "step": 7097 }, { "epoch": 0.25419449567568536, "grad_norm": 1.5908639430999756, "learning_rate": 0.00017477707089249043, "loss": 1.3639, "step": 7098 }, { "epoch": 0.25423030780525363, "grad_norm": 1.311259388923645, "learning_rate": 0.00017476936914305862, "loss": 1.6077, "step": 7099 }, { "epoch": 0.2542661199348219, "grad_norm": 1.6787432432174683, "learning_rate": 0.00017476166638770142, "loss": 1.6371, "step": 7100 }, { "epoch": 0.2543019320643902, "grad_norm": 1.8875246047973633, "learning_rate": 0.0001747539626265224, "loss": 1.5801, "step": 7101 }, { "epoch": 0.2543377441939585, "grad_norm": 2.0725271701812744, "learning_rate": 0.00017474625785962524, "loss": 1.6117, "step": 7102 }, { "epoch": 0.25437355632352676, "grad_norm": 2.2691195011138916, "learning_rate": 0.00017473855208711362, "loss": 1.6867, "step": 7103 }, { "epoch": 0.2544093684530951, "grad_norm": 1.4735385179519653, "learning_rate": 0.00017473084530909117, "loss": 1.5192, "step": 7104 }, { "epoch": 0.25444518058266336, "grad_norm": 1.6299970149993896, "learning_rate": 0.0001747231375256616, "loss": 1.74, "step": 7105 }, { "epoch": 0.2544809927122316, "grad_norm": 1.8156439065933228, "learning_rate": 0.00017471542873692862, "loss": 1.6018, "step": 7106 }, { "epoch": 0.2545168048417999, "grad_norm": 1.7628570795059204, "learning_rate": 0.0001747077189429959, "loss": 1.3708, "step": 7107 }, { "epoch": 0.2545526169713682, "grad_norm": 1.5150426626205444, "learning_rate": 0.00017470000814396718, "loss": 1.7994, "step": 7108 }, { "epoch": 0.2545884291009365, "grad_norm": 2.202345371246338, "learning_rate": 0.0001746922963399462, "loss": 1.3141, "step": 7109 }, { "epoch": 0.25462424123050476, "grad_norm": 1.6175556182861328, "learning_rate": 0.00017468458353103676, "loss": 1.4706, "step": 7110 }, { "epoch": 0.2546600533600731, "grad_norm": 1.6333051919937134, "learning_rate": 0.00017467686971734257, "loss": 1.621, "step": 7111 }, { "epoch": 0.25469586548964135, "grad_norm": 1.4622938632965088, "learning_rate": 0.0001746691548989674, "loss": 1.1874, "step": 7112 }, { "epoch": 0.2547316776192096, "grad_norm": 1.556118369102478, "learning_rate": 0.00017466143907601508, "loss": 1.5941, "step": 7113 }, { "epoch": 0.2547674897487779, "grad_norm": 1.436787724494934, "learning_rate": 0.00017465372224858937, "loss": 1.5353, "step": 7114 }, { "epoch": 0.2548033018783462, "grad_norm": 1.2040411233901978, "learning_rate": 0.00017464600441679417, "loss": 1.5374, "step": 7115 }, { "epoch": 0.2548391140079145, "grad_norm": 1.6581120491027832, "learning_rate": 0.0001746382855807333, "loss": 1.5132, "step": 7116 }, { "epoch": 0.25487492613748275, "grad_norm": 1.7110474109649658, "learning_rate": 0.0001746305657405105, "loss": 1.5188, "step": 7117 }, { "epoch": 0.2549107382670511, "grad_norm": 1.6720296144485474, "learning_rate": 0.00017462284489622973, "loss": 1.4921, "step": 7118 }, { "epoch": 0.25494655039661934, "grad_norm": 1.5299689769744873, "learning_rate": 0.00017461512304799484, "loss": 1.4806, "step": 7119 }, { "epoch": 0.2549823625261876, "grad_norm": 1.5790822505950928, "learning_rate": 0.0001746074001959097, "loss": 1.2113, "step": 7120 }, { "epoch": 0.2550181746557559, "grad_norm": 2.029690980911255, "learning_rate": 0.00017459967634007826, "loss": 1.6939, "step": 7121 }, { "epoch": 0.2550539867853242, "grad_norm": 1.6915394067764282, "learning_rate": 0.00017459195148060438, "loss": 1.2217, "step": 7122 }, { "epoch": 0.2550897989148925, "grad_norm": 2.6420986652374268, "learning_rate": 0.00017458422561759203, "loss": 2.0125, "step": 7123 }, { "epoch": 0.25512561104446074, "grad_norm": 1.7383835315704346, "learning_rate": 0.0001745764987511451, "loss": 1.2076, "step": 7124 }, { "epoch": 0.25516142317402907, "grad_norm": 1.5973914861679077, "learning_rate": 0.0001745687708813676, "loss": 1.5747, "step": 7125 }, { "epoch": 0.25519723530359734, "grad_norm": 1.8514518737792969, "learning_rate": 0.00017456104200836347, "loss": 1.3596, "step": 7126 }, { "epoch": 0.2552330474331656, "grad_norm": 1.7249946594238281, "learning_rate": 0.00017455331213223668, "loss": 1.7315, "step": 7127 }, { "epoch": 0.2552688595627339, "grad_norm": 1.592330813407898, "learning_rate": 0.00017454558125309125, "loss": 1.4371, "step": 7128 }, { "epoch": 0.2553046716923022, "grad_norm": 4.213657379150391, "learning_rate": 0.00017453784937103122, "loss": 1.464, "step": 7129 }, { "epoch": 0.25534048382187047, "grad_norm": 1.4696651697158813, "learning_rate": 0.00017453011648616053, "loss": 1.4912, "step": 7130 }, { "epoch": 0.25537629595143874, "grad_norm": 1.5843778848648071, "learning_rate": 0.00017452238259858327, "loss": 1.6334, "step": 7131 }, { "epoch": 0.25541210808100706, "grad_norm": 1.2764482498168945, "learning_rate": 0.00017451464770840348, "loss": 1.5292, "step": 7132 }, { "epoch": 0.25544792021057533, "grad_norm": 1.7480367422103882, "learning_rate": 0.00017450691181572522, "loss": 1.387, "step": 7133 }, { "epoch": 0.2554837323401436, "grad_norm": 2.2001123428344727, "learning_rate": 0.00017449917492065256, "loss": 1.4539, "step": 7134 }, { "epoch": 0.25551954446971187, "grad_norm": 1.604820728302002, "learning_rate": 0.0001744914370232896, "loss": 1.5517, "step": 7135 }, { "epoch": 0.2555553565992802, "grad_norm": 1.7131909132003784, "learning_rate": 0.00017448369812374045, "loss": 1.4457, "step": 7136 }, { "epoch": 0.25559116872884846, "grad_norm": 1.6699351072311401, "learning_rate": 0.00017447595822210924, "loss": 1.2495, "step": 7137 }, { "epoch": 0.25562698085841673, "grad_norm": 1.6325026750564575, "learning_rate": 0.00017446821731850008, "loss": 1.4587, "step": 7138 }, { "epoch": 0.25566279298798505, "grad_norm": 1.5442523956298828, "learning_rate": 0.00017446047541301707, "loss": 1.4748, "step": 7139 }, { "epoch": 0.2556986051175533, "grad_norm": 2.9096853733062744, "learning_rate": 0.00017445273250576442, "loss": 1.6572, "step": 7140 }, { "epoch": 0.2557344172471216, "grad_norm": 1.796773910522461, "learning_rate": 0.0001744449885968463, "loss": 1.4552, "step": 7141 }, { "epoch": 0.25577022937668986, "grad_norm": 2.095973253250122, "learning_rate": 0.00017443724368636693, "loss": 1.4722, "step": 7142 }, { "epoch": 0.2558060415062582, "grad_norm": 1.7989848852157593, "learning_rate": 0.00017442949777443038, "loss": 1.7267, "step": 7143 }, { "epoch": 0.25584185363582646, "grad_norm": 1.8527088165283203, "learning_rate": 0.000174421750861141, "loss": 1.5778, "step": 7144 }, { "epoch": 0.2558776657653947, "grad_norm": 1.6256322860717773, "learning_rate": 0.00017441400294660294, "loss": 1.6681, "step": 7145 }, { "epoch": 0.25591347789496305, "grad_norm": 1.3841743469238281, "learning_rate": 0.00017440625403092045, "loss": 1.5331, "step": 7146 }, { "epoch": 0.2559492900245313, "grad_norm": 1.9727689027786255, "learning_rate": 0.00017439850411419782, "loss": 1.5752, "step": 7147 }, { "epoch": 0.2559851021540996, "grad_norm": 1.6270530223846436, "learning_rate": 0.00017439075319653928, "loss": 1.7994, "step": 7148 }, { "epoch": 0.25602091428366786, "grad_norm": 1.7818320989608765, "learning_rate": 0.0001743830012780491, "loss": 1.6047, "step": 7149 }, { "epoch": 0.2560567264132362, "grad_norm": 1.760941505432129, "learning_rate": 0.00017437524835883157, "loss": 1.6809, "step": 7150 }, { "epoch": 0.25609253854280445, "grad_norm": 1.2127779722213745, "learning_rate": 0.00017436749443899103, "loss": 1.6395, "step": 7151 }, { "epoch": 0.2561283506723727, "grad_norm": 2.064774751663208, "learning_rate": 0.00017435973951863179, "loss": 1.5165, "step": 7152 }, { "epoch": 0.25616416280194104, "grad_norm": 2.9734225273132324, "learning_rate": 0.0001743519835978581, "loss": 1.3607, "step": 7153 }, { "epoch": 0.2561999749315093, "grad_norm": 1.5963677167892456, "learning_rate": 0.00017434422667677446, "loss": 1.6676, "step": 7154 }, { "epoch": 0.2562357870610776, "grad_norm": 1.5437045097351074, "learning_rate": 0.00017433646875548512, "loss": 1.583, "step": 7155 }, { "epoch": 0.25627159919064585, "grad_norm": 1.6270062923431396, "learning_rate": 0.0001743287098340945, "loss": 1.4786, "step": 7156 }, { "epoch": 0.2563074113202142, "grad_norm": 1.7923583984375, "learning_rate": 0.00017432094991270692, "loss": 1.6218, "step": 7157 }, { "epoch": 0.25634322344978244, "grad_norm": 2.1703848838806152, "learning_rate": 0.00017431318899142686, "loss": 2.0572, "step": 7158 }, { "epoch": 0.2563790355793507, "grad_norm": 1.547512173652649, "learning_rate": 0.0001743054270703587, "loss": 1.4179, "step": 7159 }, { "epoch": 0.25641484770891904, "grad_norm": 1.7714463472366333, "learning_rate": 0.00017429766414960685, "loss": 1.5927, "step": 7160 }, { "epoch": 0.2564506598384873, "grad_norm": 2.190034866333008, "learning_rate": 0.0001742899002292758, "loss": 1.8858, "step": 7161 }, { "epoch": 0.2564864719680556, "grad_norm": 1.3032276630401611, "learning_rate": 0.00017428213530946995, "loss": 1.6218, "step": 7162 }, { "epoch": 0.25652228409762384, "grad_norm": 1.72031569480896, "learning_rate": 0.00017427436939029378, "loss": 1.6231, "step": 7163 }, { "epoch": 0.25655809622719217, "grad_norm": 1.9104535579681396, "learning_rate": 0.00017426660247185177, "loss": 1.4184, "step": 7164 }, { "epoch": 0.25659390835676044, "grad_norm": 1.665347933769226, "learning_rate": 0.0001742588345542484, "loss": 1.9099, "step": 7165 }, { "epoch": 0.2566297204863287, "grad_norm": 1.2468254566192627, "learning_rate": 0.00017425106563758824, "loss": 1.458, "step": 7166 }, { "epoch": 0.25666553261589703, "grad_norm": 2.2506954669952393, "learning_rate": 0.00017424329572197578, "loss": 1.6085, "step": 7167 }, { "epoch": 0.2567013447454653, "grad_norm": 1.5501458644866943, "learning_rate": 0.0001742355248075155, "loss": 1.4801, "step": 7168 }, { "epoch": 0.25673715687503357, "grad_norm": 1.3393751382827759, "learning_rate": 0.00017422775289431202, "loss": 1.4539, "step": 7169 }, { "epoch": 0.25677296900460184, "grad_norm": 1.88332200050354, "learning_rate": 0.00017421997998246985, "loss": 1.6451, "step": 7170 }, { "epoch": 0.25680878113417016, "grad_norm": 2.4323949813842773, "learning_rate": 0.0001742122060720936, "loss": 1.7289, "step": 7171 }, { "epoch": 0.25684459326373843, "grad_norm": 2.0443127155303955, "learning_rate": 0.00017420443116328784, "loss": 1.3284, "step": 7172 }, { "epoch": 0.2568804053933067, "grad_norm": 1.3344660997390747, "learning_rate": 0.0001741966552561572, "loss": 1.3553, "step": 7173 }, { "epoch": 0.256916217522875, "grad_norm": 1.9592489004135132, "learning_rate": 0.00017418887835080624, "loss": 1.3423, "step": 7174 }, { "epoch": 0.2569520296524433, "grad_norm": 1.8439921140670776, "learning_rate": 0.0001741811004473396, "loss": 1.4173, "step": 7175 }, { "epoch": 0.25698784178201156, "grad_norm": 2.6069836616516113, "learning_rate": 0.000174173321545862, "loss": 1.42, "step": 7176 }, { "epoch": 0.25702365391157983, "grad_norm": 1.5937824249267578, "learning_rate": 0.000174165541646478, "loss": 1.5705, "step": 7177 }, { "epoch": 0.25705946604114815, "grad_norm": 1.4017707109451294, "learning_rate": 0.0001741577607492923, "loss": 1.4923, "step": 7178 }, { "epoch": 0.2570952781707164, "grad_norm": 1.6453132629394531, "learning_rate": 0.00017414997885440957, "loss": 1.6204, "step": 7179 }, { "epoch": 0.2571310903002847, "grad_norm": 1.494167685508728, "learning_rate": 0.00017414219596193455, "loss": 1.2734, "step": 7180 }, { "epoch": 0.257166902429853, "grad_norm": 1.562565565109253, "learning_rate": 0.0001741344120719719, "loss": 1.327, "step": 7181 }, { "epoch": 0.2572027145594213, "grad_norm": 1.6081397533416748, "learning_rate": 0.00017412662718462637, "loss": 1.546, "step": 7182 }, { "epoch": 0.25723852668898955, "grad_norm": 1.5741753578186035, "learning_rate": 0.00017411884130000271, "loss": 1.6006, "step": 7183 }, { "epoch": 0.2572743388185578, "grad_norm": 2.1187093257904053, "learning_rate": 0.00017411105441820563, "loss": 1.5594, "step": 7184 }, { "epoch": 0.25731015094812615, "grad_norm": 1.4211379289627075, "learning_rate": 0.0001741032665393399, "loss": 1.6873, "step": 7185 }, { "epoch": 0.2573459630776944, "grad_norm": 1.706182837486267, "learning_rate": 0.00017409547766351034, "loss": 1.3723, "step": 7186 }, { "epoch": 0.2573817752072627, "grad_norm": 1.8475958108901978, "learning_rate": 0.00017408768779082165, "loss": 1.4974, "step": 7187 }, { "epoch": 0.257417587336831, "grad_norm": 1.5614365339279175, "learning_rate": 0.00017407989692137872, "loss": 1.5942, "step": 7188 }, { "epoch": 0.2574533994663993, "grad_norm": 2.806450843811035, "learning_rate": 0.0001740721050552863, "loss": 1.6668, "step": 7189 }, { "epoch": 0.25748921159596755, "grad_norm": 2.1845903396606445, "learning_rate": 0.0001740643121926493, "loss": 1.4023, "step": 7190 }, { "epoch": 0.2575250237255358, "grad_norm": 1.9280097484588623, "learning_rate": 0.0001740565183335725, "loss": 1.4899, "step": 7191 }, { "epoch": 0.25756083585510414, "grad_norm": 1.400450348854065, "learning_rate": 0.00017404872347816076, "loss": 1.2537, "step": 7192 }, { "epoch": 0.2575966479846724, "grad_norm": 2.4562292098999023, "learning_rate": 0.00017404092762651898, "loss": 1.501, "step": 7193 }, { "epoch": 0.2576324601142407, "grad_norm": 2.2671611309051514, "learning_rate": 0.000174033130778752, "loss": 1.4855, "step": 7194 }, { "epoch": 0.257668272243809, "grad_norm": 1.6012718677520752, "learning_rate": 0.00017402533293496477, "loss": 1.4119, "step": 7195 }, { "epoch": 0.2577040843733773, "grad_norm": 1.4546482563018799, "learning_rate": 0.00017401753409526216, "loss": 1.6494, "step": 7196 }, { "epoch": 0.25773989650294554, "grad_norm": 2.1474809646606445, "learning_rate": 0.0001740097342597491, "loss": 1.6661, "step": 7197 }, { "epoch": 0.2577757086325138, "grad_norm": 1.6500005722045898, "learning_rate": 0.0001740019334285305, "loss": 1.4682, "step": 7198 }, { "epoch": 0.25781152076208214, "grad_norm": 1.741763710975647, "learning_rate": 0.0001739941316017114, "loss": 1.4744, "step": 7199 }, { "epoch": 0.2578473328916504, "grad_norm": 2.0389153957366943, "learning_rate": 0.00017398632877939666, "loss": 1.584, "step": 7200 }, { "epoch": 0.2578831450212187, "grad_norm": 1.478593111038208, "learning_rate": 0.00017397852496169134, "loss": 1.4449, "step": 7201 }, { "epoch": 0.257918957150787, "grad_norm": 2.4067537784576416, "learning_rate": 0.00017397072014870037, "loss": 1.3542, "step": 7202 }, { "epoch": 0.25795476928035527, "grad_norm": 2.244966506958008, "learning_rate": 0.00017396291434052877, "loss": 1.4444, "step": 7203 }, { "epoch": 0.25799058140992354, "grad_norm": 1.5638600587844849, "learning_rate": 0.00017395510753728157, "loss": 1.441, "step": 7204 }, { "epoch": 0.2580263935394918, "grad_norm": 1.8143665790557861, "learning_rate": 0.0001739472997390638, "loss": 1.7743, "step": 7205 }, { "epoch": 0.25806220566906013, "grad_norm": 1.868913173675537, "learning_rate": 0.00017393949094598047, "loss": 1.3007, "step": 7206 }, { "epoch": 0.2580980177986284, "grad_norm": 1.5457756519317627, "learning_rate": 0.00017393168115813673, "loss": 1.6073, "step": 7207 }, { "epoch": 0.25813382992819667, "grad_norm": 1.3162868022918701, "learning_rate": 0.0001739238703756375, "loss": 1.5155, "step": 7208 }, { "epoch": 0.258169642057765, "grad_norm": 1.955180287361145, "learning_rate": 0.00017391605859858798, "loss": 1.5373, "step": 7209 }, { "epoch": 0.25820545418733326, "grad_norm": 1.7129416465759277, "learning_rate": 0.00017390824582709326, "loss": 1.8984, "step": 7210 }, { "epoch": 0.25824126631690153, "grad_norm": 1.3623757362365723, "learning_rate": 0.0001739004320612584, "loss": 1.5941, "step": 7211 }, { "epoch": 0.2582770784464698, "grad_norm": 1.5919731855392456, "learning_rate": 0.00017389261730118858, "loss": 1.6802, "step": 7212 }, { "epoch": 0.2583128905760381, "grad_norm": 1.5978248119354248, "learning_rate": 0.0001738848015469889, "loss": 1.4566, "step": 7213 }, { "epoch": 0.2583487027056064, "grad_norm": 2.2260446548461914, "learning_rate": 0.0001738769847987645, "loss": 1.2414, "step": 7214 }, { "epoch": 0.25838451483517466, "grad_norm": 1.7300844192504883, "learning_rate": 0.0001738691670566206, "loss": 1.3088, "step": 7215 }, { "epoch": 0.258420326964743, "grad_norm": 1.403152346611023, "learning_rate": 0.0001738613483206623, "loss": 1.607, "step": 7216 }, { "epoch": 0.25845613909431125, "grad_norm": 1.4186724424362183, "learning_rate": 0.00017385352859099483, "loss": 1.6773, "step": 7217 }, { "epoch": 0.2584919512238795, "grad_norm": 1.7511612176895142, "learning_rate": 0.00017384570786772345, "loss": 1.8717, "step": 7218 }, { "epoch": 0.2585277633534478, "grad_norm": 2.500760793685913, "learning_rate": 0.00017383788615095327, "loss": 1.6079, "step": 7219 }, { "epoch": 0.2585635754830161, "grad_norm": 1.5331605672836304, "learning_rate": 0.0001738300634407896, "loss": 1.4821, "step": 7220 }, { "epoch": 0.2585993876125844, "grad_norm": 1.4042764902114868, "learning_rate": 0.00017382223973733767, "loss": 1.5336, "step": 7221 }, { "epoch": 0.25863519974215265, "grad_norm": 1.2958499193191528, "learning_rate": 0.0001738144150407027, "loss": 1.5881, "step": 7222 }, { "epoch": 0.258671011871721, "grad_norm": 1.417944073677063, "learning_rate": 0.00017380658935099, "loss": 1.6748, "step": 7223 }, { "epoch": 0.25870682400128925, "grad_norm": 1.7564271688461304, "learning_rate": 0.00017379876266830486, "loss": 1.5369, "step": 7224 }, { "epoch": 0.2587426361308575, "grad_norm": 1.4452077150344849, "learning_rate": 0.00017379093499275258, "loss": 1.6696, "step": 7225 }, { "epoch": 0.2587784482604258, "grad_norm": 1.0980790853500366, "learning_rate": 0.00017378310632443843, "loss": 1.2389, "step": 7226 }, { "epoch": 0.2588142603899941, "grad_norm": 1.6883370876312256, "learning_rate": 0.00017377527666346772, "loss": 1.6746, "step": 7227 }, { "epoch": 0.2588500725195624, "grad_norm": 1.3964970111846924, "learning_rate": 0.00017376744600994587, "loss": 1.6275, "step": 7228 }, { "epoch": 0.25888588464913065, "grad_norm": 2.0311527252197266, "learning_rate": 0.00017375961436397818, "loss": 1.3165, "step": 7229 }, { "epoch": 0.2589216967786989, "grad_norm": 1.7014087438583374, "learning_rate": 0.00017375178172567002, "loss": 1.5707, "step": 7230 }, { "epoch": 0.25895750890826724, "grad_norm": 1.977447748184204, "learning_rate": 0.00017374394809512676, "loss": 1.5756, "step": 7231 }, { "epoch": 0.2589933210378355, "grad_norm": 1.3852441310882568, "learning_rate": 0.0001737361134724538, "loss": 1.4648, "step": 7232 }, { "epoch": 0.2590291331674038, "grad_norm": 1.8260564804077148, "learning_rate": 0.00017372827785775655, "loss": 1.8661, "step": 7233 }, { "epoch": 0.2590649452969721, "grad_norm": 1.7901363372802734, "learning_rate": 0.00017372044125114045, "loss": 1.3408, "step": 7234 }, { "epoch": 0.2591007574265404, "grad_norm": 2.0273873805999756, "learning_rate": 0.0001737126036527109, "loss": 1.5693, "step": 7235 }, { "epoch": 0.25913656955610864, "grad_norm": 1.6030428409576416, "learning_rate": 0.00017370476506257333, "loss": 1.444, "step": 7236 }, { "epoch": 0.2591723816856769, "grad_norm": 1.4109402894973755, "learning_rate": 0.0001736969254808332, "loss": 1.3457, "step": 7237 }, { "epoch": 0.25920819381524524, "grad_norm": 2.0388617515563965, "learning_rate": 0.00017368908490759605, "loss": 1.7262, "step": 7238 }, { "epoch": 0.2592440059448135, "grad_norm": 2.0132298469543457, "learning_rate": 0.00017368124334296727, "loss": 1.3662, "step": 7239 }, { "epoch": 0.2592798180743818, "grad_norm": 1.9762897491455078, "learning_rate": 0.00017367340078705242, "loss": 1.5578, "step": 7240 }, { "epoch": 0.2593156302039501, "grad_norm": 2.113196611404419, "learning_rate": 0.000173665557239957, "loss": 1.7907, "step": 7241 }, { "epoch": 0.25935144233351837, "grad_norm": 2.036367416381836, "learning_rate": 0.00017365771270178652, "loss": 1.6257, "step": 7242 }, { "epoch": 0.25938725446308664, "grad_norm": 1.8975567817687988, "learning_rate": 0.00017364986717264652, "loss": 1.7479, "step": 7243 }, { "epoch": 0.2594230665926549, "grad_norm": 1.5035462379455566, "learning_rate": 0.00017364202065264258, "loss": 1.7363, "step": 7244 }, { "epoch": 0.25945887872222323, "grad_norm": 2.175387382507324, "learning_rate": 0.00017363417314188024, "loss": 1.8906, "step": 7245 }, { "epoch": 0.2594946908517915, "grad_norm": 1.3993414640426636, "learning_rate": 0.00017362632464046506, "loss": 1.5836, "step": 7246 }, { "epoch": 0.25953050298135977, "grad_norm": 1.9026535749435425, "learning_rate": 0.00017361847514850266, "loss": 1.5866, "step": 7247 }, { "epoch": 0.2595663151109281, "grad_norm": 2.123619318008423, "learning_rate": 0.00017361062466609867, "loss": 1.1663, "step": 7248 }, { "epoch": 0.25960212724049636, "grad_norm": 1.8966612815856934, "learning_rate": 0.00017360277319335865, "loss": 1.5483, "step": 7249 }, { "epoch": 0.25963793937006463, "grad_norm": 1.919511079788208, "learning_rate": 0.00017359492073038826, "loss": 1.7607, "step": 7250 }, { "epoch": 0.2596737514996329, "grad_norm": 1.6821178197860718, "learning_rate": 0.00017358706727729311, "loss": 1.3796, "step": 7251 }, { "epoch": 0.2597095636292012, "grad_norm": 1.4946900606155396, "learning_rate": 0.00017357921283417892, "loss": 1.7299, "step": 7252 }, { "epoch": 0.2597453757587695, "grad_norm": 1.5954385995864868, "learning_rate": 0.00017357135740115137, "loss": 1.4342, "step": 7253 }, { "epoch": 0.25978118788833776, "grad_norm": 1.8522142171859741, "learning_rate": 0.00017356350097831605, "loss": 1.4027, "step": 7254 }, { "epoch": 0.2598170000179061, "grad_norm": 1.701069712638855, "learning_rate": 0.00017355564356577873, "loss": 1.6356, "step": 7255 }, { "epoch": 0.25985281214747435, "grad_norm": 2.1512773036956787, "learning_rate": 0.00017354778516364512, "loss": 1.4857, "step": 7256 }, { "epoch": 0.2598886242770426, "grad_norm": 2.0113370418548584, "learning_rate": 0.00017353992577202093, "loss": 1.7957, "step": 7257 }, { "epoch": 0.2599244364066109, "grad_norm": 1.7597367763519287, "learning_rate": 0.00017353206539101186, "loss": 1.4799, "step": 7258 }, { "epoch": 0.2599602485361792, "grad_norm": 1.4501522779464722, "learning_rate": 0.00017352420402072375, "loss": 1.4439, "step": 7259 }, { "epoch": 0.2599960606657475, "grad_norm": 1.3950390815734863, "learning_rate": 0.00017351634166126227, "loss": 1.6883, "step": 7260 }, { "epoch": 0.26003187279531575, "grad_norm": 1.6042085886001587, "learning_rate": 0.00017350847831273329, "loss": 1.7091, "step": 7261 }, { "epoch": 0.2600676849248841, "grad_norm": 1.632921814918518, "learning_rate": 0.00017350061397524252, "loss": 1.541, "step": 7262 }, { "epoch": 0.26010349705445235, "grad_norm": 1.503475546836853, "learning_rate": 0.0001734927486488958, "loss": 1.5553, "step": 7263 }, { "epoch": 0.2601393091840206, "grad_norm": 1.7809571027755737, "learning_rate": 0.00017348488233379897, "loss": 1.8013, "step": 7264 }, { "epoch": 0.2601751213135889, "grad_norm": 1.4528809785842896, "learning_rate": 0.0001734770150300578, "loss": 1.5241, "step": 7265 }, { "epoch": 0.2602109334431572, "grad_norm": 1.5344305038452148, "learning_rate": 0.00017346914673777822, "loss": 1.7202, "step": 7266 }, { "epoch": 0.2602467455727255, "grad_norm": 1.6437627077102661, "learning_rate": 0.000173461277457066, "loss": 1.4926, "step": 7267 }, { "epoch": 0.26028255770229375, "grad_norm": 1.5676155090332031, "learning_rate": 0.00017345340718802704, "loss": 1.6236, "step": 7268 }, { "epoch": 0.2603183698318621, "grad_norm": 2.03763747215271, "learning_rate": 0.00017344553593076726, "loss": 1.3897, "step": 7269 }, { "epoch": 0.26035418196143034, "grad_norm": 2.440800666809082, "learning_rate": 0.00017343766368539253, "loss": 1.7987, "step": 7270 }, { "epoch": 0.2603899940909986, "grad_norm": 1.6920621395111084, "learning_rate": 0.00017342979045200876, "loss": 1.8625, "step": 7271 }, { "epoch": 0.2604258062205669, "grad_norm": 1.7777220010757446, "learning_rate": 0.00017342191623072187, "loss": 1.375, "step": 7272 }, { "epoch": 0.2604616183501352, "grad_norm": 1.4054484367370605, "learning_rate": 0.00017341404102163782, "loss": 1.64, "step": 7273 }, { "epoch": 0.2604974304797035, "grad_norm": 2.3189139366149902, "learning_rate": 0.00017340616482486253, "loss": 1.6583, "step": 7274 }, { "epoch": 0.26053324260927174, "grad_norm": 1.4085335731506348, "learning_rate": 0.00017339828764050198, "loss": 1.3233, "step": 7275 }, { "epoch": 0.26056905473884007, "grad_norm": 1.6071890592575073, "learning_rate": 0.00017339040946866217, "loss": 1.6617, "step": 7276 }, { "epoch": 0.26060486686840834, "grad_norm": 1.9245442152023315, "learning_rate": 0.00017338253030944905, "loss": 1.7055, "step": 7277 }, { "epoch": 0.2606406789979766, "grad_norm": 2.0860061645507812, "learning_rate": 0.00017337465016296864, "loss": 1.5268, "step": 7278 }, { "epoch": 0.2606764911275449, "grad_norm": 1.7437604665756226, "learning_rate": 0.00017336676902932695, "loss": 1.4931, "step": 7279 }, { "epoch": 0.2607123032571132, "grad_norm": 2.2297158241271973, "learning_rate": 0.00017335888690863, "loss": 1.4163, "step": 7280 }, { "epoch": 0.26074811538668147, "grad_norm": 1.6739795207977295, "learning_rate": 0.00017335100380098392, "loss": 1.6585, "step": 7281 }, { "epoch": 0.26078392751624974, "grad_norm": 1.6758095026016235, "learning_rate": 0.00017334311970649465, "loss": 1.5066, "step": 7282 }, { "epoch": 0.26081973964581806, "grad_norm": 1.510539174079895, "learning_rate": 0.00017333523462526832, "loss": 1.5937, "step": 7283 }, { "epoch": 0.26085555177538633, "grad_norm": 2.0457797050476074, "learning_rate": 0.000173327348557411, "loss": 1.6134, "step": 7284 }, { "epoch": 0.2608913639049546, "grad_norm": 1.2520917654037476, "learning_rate": 0.00017331946150302878, "loss": 1.4669, "step": 7285 }, { "epoch": 0.26092717603452287, "grad_norm": 1.4760613441467285, "learning_rate": 0.00017331157346222779, "loss": 1.3615, "step": 7286 }, { "epoch": 0.2609629881640912, "grad_norm": 1.5453470945358276, "learning_rate": 0.00017330368443511417, "loss": 1.6023, "step": 7287 }, { "epoch": 0.26099880029365946, "grad_norm": 1.4087209701538086, "learning_rate": 0.00017329579442179401, "loss": 1.7286, "step": 7288 }, { "epoch": 0.26103461242322773, "grad_norm": 1.5778745412826538, "learning_rate": 0.00017328790342237347, "loss": 1.4194, "step": 7289 }, { "epoch": 0.26107042455279605, "grad_norm": 1.4336007833480835, "learning_rate": 0.00017328001143695874, "loss": 1.2318, "step": 7290 }, { "epoch": 0.2611062366823643, "grad_norm": 1.543596625328064, "learning_rate": 0.00017327211846565596, "loss": 1.5086, "step": 7291 }, { "epoch": 0.2611420488119326, "grad_norm": 1.4977967739105225, "learning_rate": 0.0001732642245085714, "loss": 1.5653, "step": 7292 }, { "epoch": 0.26117786094150086, "grad_norm": 1.346598744392395, "learning_rate": 0.00017325632956581113, "loss": 1.5548, "step": 7293 }, { "epoch": 0.2612136730710692, "grad_norm": 2.323775053024292, "learning_rate": 0.00017324843363748148, "loss": 1.521, "step": 7294 }, { "epoch": 0.26124948520063745, "grad_norm": 1.5547338724136353, "learning_rate": 0.00017324053672368862, "loss": 1.5629, "step": 7295 }, { "epoch": 0.2612852973302057, "grad_norm": 1.9380877017974854, "learning_rate": 0.0001732326388245388, "loss": 1.5277, "step": 7296 }, { "epoch": 0.26132110945977405, "grad_norm": 2.077476739883423, "learning_rate": 0.00017322473994013833, "loss": 1.4113, "step": 7297 }, { "epoch": 0.2613569215893423, "grad_norm": 1.7269058227539062, "learning_rate": 0.00017321684007059343, "loss": 1.4078, "step": 7298 }, { "epoch": 0.2613927337189106, "grad_norm": 1.5610759258270264, "learning_rate": 0.00017320893921601036, "loss": 1.348, "step": 7299 }, { "epoch": 0.26142854584847885, "grad_norm": 1.4764721393585205, "learning_rate": 0.00017320103737649548, "loss": 1.3946, "step": 7300 }, { "epoch": 0.2614643579780472, "grad_norm": 1.443997859954834, "learning_rate": 0.00017319313455215504, "loss": 1.4114, "step": 7301 }, { "epoch": 0.26150017010761545, "grad_norm": 1.4743808507919312, "learning_rate": 0.00017318523074309538, "loss": 1.7602, "step": 7302 }, { "epoch": 0.2615359822371837, "grad_norm": 1.5797048807144165, "learning_rate": 0.00017317732594942286, "loss": 1.5341, "step": 7303 }, { "epoch": 0.26157179436675204, "grad_norm": 2.011399745941162, "learning_rate": 0.0001731694201712438, "loss": 1.5281, "step": 7304 }, { "epoch": 0.2616076064963203, "grad_norm": 3.038586378097534, "learning_rate": 0.0001731615134086646, "loss": 1.4805, "step": 7305 }, { "epoch": 0.2616434186258886, "grad_norm": 1.6645811796188354, "learning_rate": 0.00017315360566179158, "loss": 1.4753, "step": 7306 }, { "epoch": 0.26167923075545685, "grad_norm": 1.668992519378662, "learning_rate": 0.00017314569693073115, "loss": 1.545, "step": 7307 }, { "epoch": 0.2617150428850252, "grad_norm": 1.6997525691986084, "learning_rate": 0.00017313778721558975, "loss": 1.5749, "step": 7308 }, { "epoch": 0.26175085501459344, "grad_norm": 1.5419024229049683, "learning_rate": 0.00017312987651647374, "loss": 1.457, "step": 7309 }, { "epoch": 0.2617866671441617, "grad_norm": 1.712589144706726, "learning_rate": 0.0001731219648334896, "loss": 2.0299, "step": 7310 }, { "epoch": 0.26182247927373004, "grad_norm": 1.5588375329971313, "learning_rate": 0.00017311405216674373, "loss": 1.5899, "step": 7311 }, { "epoch": 0.2618582914032983, "grad_norm": 1.906366229057312, "learning_rate": 0.00017310613851634257, "loss": 1.698, "step": 7312 }, { "epoch": 0.2618941035328666, "grad_norm": 1.9092109203338623, "learning_rate": 0.00017309822388239266, "loss": 1.6362, "step": 7313 }, { "epoch": 0.26192991566243484, "grad_norm": 1.797682285308838, "learning_rate": 0.0001730903082650004, "loss": 1.3929, "step": 7314 }, { "epoch": 0.26196572779200317, "grad_norm": 2.1721010208129883, "learning_rate": 0.00017308239166427232, "loss": 1.9409, "step": 7315 }, { "epoch": 0.26200153992157144, "grad_norm": 2.2614290714263916, "learning_rate": 0.00017307447408031497, "loss": 1.5107, "step": 7316 }, { "epoch": 0.2620373520511397, "grad_norm": 1.8231735229492188, "learning_rate": 0.0001730665555132348, "loss": 1.4254, "step": 7317 }, { "epoch": 0.26207316418070803, "grad_norm": 1.7644098997116089, "learning_rate": 0.00017305863596313837, "loss": 1.4696, "step": 7318 }, { "epoch": 0.2621089763102763, "grad_norm": 1.7434821128845215, "learning_rate": 0.00017305071543013227, "loss": 1.522, "step": 7319 }, { "epoch": 0.26214478843984457, "grad_norm": 1.7708624601364136, "learning_rate": 0.000173042793914323, "loss": 1.4897, "step": 7320 }, { "epoch": 0.26218060056941284, "grad_norm": 1.6023200750350952, "learning_rate": 0.00017303487141581716, "loss": 1.58, "step": 7321 }, { "epoch": 0.26221641269898116, "grad_norm": 2.101994752883911, "learning_rate": 0.0001730269479347213, "loss": 1.7245, "step": 7322 }, { "epoch": 0.26225222482854943, "grad_norm": 1.9376888275146484, "learning_rate": 0.00017301902347114208, "loss": 1.5611, "step": 7323 }, { "epoch": 0.2622880369581177, "grad_norm": 2.2252988815307617, "learning_rate": 0.0001730110980251861, "loss": 1.7032, "step": 7324 }, { "epoch": 0.262323849087686, "grad_norm": 1.6127136945724487, "learning_rate": 0.00017300317159695995, "loss": 1.4226, "step": 7325 }, { "epoch": 0.2623596612172543, "grad_norm": 2.258042812347412, "learning_rate": 0.0001729952441865703, "loss": 1.6989, "step": 7326 }, { "epoch": 0.26239547334682256, "grad_norm": 1.4718269109725952, "learning_rate": 0.0001729873157941238, "loss": 1.5979, "step": 7327 }, { "epoch": 0.26243128547639083, "grad_norm": 1.5999681949615479, "learning_rate": 0.00017297938641972716, "loss": 1.4861, "step": 7328 }, { "epoch": 0.26246709760595915, "grad_norm": 1.6622743606567383, "learning_rate": 0.00017297145606348695, "loss": 1.4849, "step": 7329 }, { "epoch": 0.2625029097355274, "grad_norm": 2.1193180084228516, "learning_rate": 0.00017296352472550994, "loss": 1.8168, "step": 7330 }, { "epoch": 0.2625387218650957, "grad_norm": 1.6978873014450073, "learning_rate": 0.00017295559240590282, "loss": 1.9455, "step": 7331 }, { "epoch": 0.262574533994664, "grad_norm": 1.5294241905212402, "learning_rate": 0.00017294765910477234, "loss": 1.5882, "step": 7332 }, { "epoch": 0.2626103461242323, "grad_norm": 1.2453467845916748, "learning_rate": 0.00017293972482222515, "loss": 1.4419, "step": 7333 }, { "epoch": 0.26264615825380055, "grad_norm": 2.2070870399475098, "learning_rate": 0.00017293178955836807, "loss": 1.7747, "step": 7334 }, { "epoch": 0.2626819703833688, "grad_norm": 1.5115678310394287, "learning_rate": 0.00017292385331330786, "loss": 1.6529, "step": 7335 }, { "epoch": 0.26271778251293715, "grad_norm": 1.4143116474151611, "learning_rate": 0.00017291591608715123, "loss": 1.5566, "step": 7336 }, { "epoch": 0.2627535946425054, "grad_norm": 1.4451357126235962, "learning_rate": 0.00017290797788000503, "loss": 1.6816, "step": 7337 }, { "epoch": 0.2627894067720737, "grad_norm": 1.6054461002349854, "learning_rate": 0.00017290003869197603, "loss": 1.7646, "step": 7338 }, { "epoch": 0.262825218901642, "grad_norm": 1.4832358360290527, "learning_rate": 0.00017289209852317102, "loss": 1.2788, "step": 7339 }, { "epoch": 0.2628610310312103, "grad_norm": 1.7397518157958984, "learning_rate": 0.00017288415737369689, "loss": 1.5923, "step": 7340 }, { "epoch": 0.26289684316077855, "grad_norm": 2.040109157562256, "learning_rate": 0.0001728762152436604, "loss": 1.5015, "step": 7341 }, { "epoch": 0.2629326552903468, "grad_norm": 1.6289827823638916, "learning_rate": 0.00017286827213316844, "loss": 1.283, "step": 7342 }, { "epoch": 0.26296846741991514, "grad_norm": 1.4795184135437012, "learning_rate": 0.0001728603280423279, "loss": 1.6067, "step": 7343 }, { "epoch": 0.2630042795494834, "grad_norm": 1.4530529975891113, "learning_rate": 0.00017285238297124562, "loss": 1.3256, "step": 7344 }, { "epoch": 0.2630400916790517, "grad_norm": 2.1068873405456543, "learning_rate": 0.00017284443692002846, "loss": 1.8416, "step": 7345 }, { "epoch": 0.26307590380862, "grad_norm": 1.2771168947219849, "learning_rate": 0.00017283648988878343, "loss": 1.5879, "step": 7346 }, { "epoch": 0.2631117159381883, "grad_norm": 1.4236646890640259, "learning_rate": 0.00017282854187761735, "loss": 1.8003, "step": 7347 }, { "epoch": 0.26314752806775654, "grad_norm": 1.7822551727294922, "learning_rate": 0.00017282059288663715, "loss": 1.5744, "step": 7348 }, { "epoch": 0.2631833401973248, "grad_norm": 1.6378618478775024, "learning_rate": 0.00017281264291594983, "loss": 1.69, "step": 7349 }, { "epoch": 0.26321915232689314, "grad_norm": 1.6540489196777344, "learning_rate": 0.00017280469196566235, "loss": 1.3977, "step": 7350 }, { "epoch": 0.2632549644564614, "grad_norm": 1.8168818950653076, "learning_rate": 0.0001727967400358816, "loss": 1.5793, "step": 7351 }, { "epoch": 0.2632907765860297, "grad_norm": 2.310772657394409, "learning_rate": 0.00017278878712671464, "loss": 1.8026, "step": 7352 }, { "epoch": 0.263326588715598, "grad_norm": 1.4462745189666748, "learning_rate": 0.00017278083323826846, "loss": 1.6332, "step": 7353 }, { "epoch": 0.26336240084516627, "grad_norm": 1.3352108001708984, "learning_rate": 0.00017277287837065002, "loss": 1.2135, "step": 7354 }, { "epoch": 0.26339821297473454, "grad_norm": 2.1556811332702637, "learning_rate": 0.0001727649225239664, "loss": 1.4066, "step": 7355 }, { "epoch": 0.2634340251043028, "grad_norm": 1.7638002634048462, "learning_rate": 0.00017275696569832457, "loss": 1.623, "step": 7356 }, { "epoch": 0.26346983723387113, "grad_norm": 1.8131543397903442, "learning_rate": 0.00017274900789383165, "loss": 1.4694, "step": 7357 }, { "epoch": 0.2635056493634394, "grad_norm": 1.7827945947647095, "learning_rate": 0.0001727410491105946, "loss": 1.3291, "step": 7358 }, { "epoch": 0.26354146149300767, "grad_norm": 2.088897943496704, "learning_rate": 0.00017273308934872064, "loss": 1.4516, "step": 7359 }, { "epoch": 0.263577273622576, "grad_norm": 1.7818443775177002, "learning_rate": 0.00017272512860831674, "loss": 1.8525, "step": 7360 }, { "epoch": 0.26361308575214426, "grad_norm": 1.5051300525665283, "learning_rate": 0.00017271716688949007, "loss": 1.5285, "step": 7361 }, { "epoch": 0.26364889788171253, "grad_norm": 2.0163896083831787, "learning_rate": 0.0001727092041923477, "loss": 1.5589, "step": 7362 }, { "epoch": 0.2636847100112808, "grad_norm": 1.6455193758010864, "learning_rate": 0.00017270124051699682, "loss": 1.673, "step": 7363 }, { "epoch": 0.2637205221408491, "grad_norm": 1.561238408088684, "learning_rate": 0.00017269327586354446, "loss": 1.6594, "step": 7364 }, { "epoch": 0.2637563342704174, "grad_norm": 1.765101671218872, "learning_rate": 0.00017268531023209788, "loss": 1.5668, "step": 7365 }, { "epoch": 0.26379214639998566, "grad_norm": 2.3557276725769043, "learning_rate": 0.0001726773436227642, "loss": 1.5615, "step": 7366 }, { "epoch": 0.263827958529554, "grad_norm": 1.725612759590149, "learning_rate": 0.0001726693760356506, "loss": 1.5138, "step": 7367 }, { "epoch": 0.26386377065912225, "grad_norm": 1.6361420154571533, "learning_rate": 0.0001726614074708643, "loss": 1.3698, "step": 7368 }, { "epoch": 0.2638995827886905, "grad_norm": 1.775638461112976, "learning_rate": 0.00017265343792851248, "loss": 1.3577, "step": 7369 }, { "epoch": 0.2639353949182588, "grad_norm": 1.387298345565796, "learning_rate": 0.00017264546740870234, "loss": 1.5771, "step": 7370 }, { "epoch": 0.2639712070478271, "grad_norm": 1.341925859451294, "learning_rate": 0.0001726374959115412, "loss": 1.3746, "step": 7371 }, { "epoch": 0.2640070191773954, "grad_norm": 1.5934443473815918, "learning_rate": 0.0001726295234371362, "loss": 1.5295, "step": 7372 }, { "epoch": 0.26404283130696365, "grad_norm": 1.5405175685882568, "learning_rate": 0.00017262154998559466, "loss": 1.5476, "step": 7373 }, { "epoch": 0.264078643436532, "grad_norm": 1.6285407543182373, "learning_rate": 0.00017261357555702387, "loss": 1.565, "step": 7374 }, { "epoch": 0.26411445556610025, "grad_norm": 2.251279592514038, "learning_rate": 0.00017260560015153106, "loss": 1.7117, "step": 7375 }, { "epoch": 0.2641502676956685, "grad_norm": 2.265822410583496, "learning_rate": 0.00017259762376922356, "loss": 2.0113, "step": 7376 }, { "epoch": 0.2641860798252368, "grad_norm": 2.2183547019958496, "learning_rate": 0.00017258964641020868, "loss": 1.5966, "step": 7377 }, { "epoch": 0.2642218919548051, "grad_norm": 2.0988476276397705, "learning_rate": 0.0001725816680745937, "loss": 1.3639, "step": 7378 }, { "epoch": 0.2642577040843734, "grad_norm": 2.5295467376708984, "learning_rate": 0.00017257368876248604, "loss": 1.4807, "step": 7379 }, { "epoch": 0.26429351621394165, "grad_norm": 1.4437577724456787, "learning_rate": 0.000172565708473993, "loss": 1.4974, "step": 7380 }, { "epoch": 0.26432932834351, "grad_norm": 1.2854939699172974, "learning_rate": 0.00017255772720922195, "loss": 1.6244, "step": 7381 }, { "epoch": 0.26436514047307824, "grad_norm": 1.3018090724945068, "learning_rate": 0.0001725497449682803, "loss": 1.3882, "step": 7382 }, { "epoch": 0.2644009526026465, "grad_norm": 1.5124176740646362, "learning_rate": 0.00017254176175127538, "loss": 1.4241, "step": 7383 }, { "epoch": 0.2644367647322148, "grad_norm": 2.2638661861419678, "learning_rate": 0.00017253377755831466, "loss": 1.7341, "step": 7384 }, { "epoch": 0.2644725768617831, "grad_norm": 1.42527174949646, "learning_rate": 0.00017252579238950552, "loss": 1.5593, "step": 7385 }, { "epoch": 0.2645083889913514, "grad_norm": 1.981040596961975, "learning_rate": 0.00017251780624495536, "loss": 1.482, "step": 7386 }, { "epoch": 0.26454420112091964, "grad_norm": 2.1076390743255615, "learning_rate": 0.0001725098191247717, "loss": 1.5527, "step": 7387 }, { "epoch": 0.26458001325048797, "grad_norm": 1.287087321281433, "learning_rate": 0.00017250183102906195, "loss": 1.7802, "step": 7388 }, { "epoch": 0.26461582538005624, "grad_norm": 1.3365637063980103, "learning_rate": 0.00017249384195793357, "loss": 1.4736, "step": 7389 }, { "epoch": 0.2646516375096245, "grad_norm": 1.6480307579040527, "learning_rate": 0.000172485851911494, "loss": 1.8273, "step": 7390 }, { "epoch": 0.2646874496391928, "grad_norm": 2.1490418910980225, "learning_rate": 0.00017247786088985087, "loss": 1.751, "step": 7391 }, { "epoch": 0.2647232617687611, "grad_norm": 1.840654730796814, "learning_rate": 0.0001724698688931116, "loss": 1.772, "step": 7392 }, { "epoch": 0.26475907389832937, "grad_norm": 3.2278695106506348, "learning_rate": 0.0001724618759213837, "loss": 1.7355, "step": 7393 }, { "epoch": 0.26479488602789764, "grad_norm": 2.007066249847412, "learning_rate": 0.00017245388197477477, "loss": 1.4662, "step": 7394 }, { "epoch": 0.26483069815746596, "grad_norm": 1.8525476455688477, "learning_rate": 0.0001724458870533923, "loss": 1.6683, "step": 7395 }, { "epoch": 0.26486651028703423, "grad_norm": 1.865721583366394, "learning_rate": 0.00017243789115734383, "loss": 1.7475, "step": 7396 }, { "epoch": 0.2649023224166025, "grad_norm": 1.5090208053588867, "learning_rate": 0.00017242989428673701, "loss": 1.5835, "step": 7397 }, { "epoch": 0.26493813454617077, "grad_norm": 2.2835018634796143, "learning_rate": 0.0001724218964416794, "loss": 1.4297, "step": 7398 }, { "epoch": 0.2649739466757391, "grad_norm": 1.7916940450668335, "learning_rate": 0.00017241389762227857, "loss": 1.666, "step": 7399 }, { "epoch": 0.26500975880530736, "grad_norm": 1.830520510673523, "learning_rate": 0.00017240589782864215, "loss": 1.5415, "step": 7400 }, { "epoch": 0.26504557093487563, "grad_norm": 1.5442086458206177, "learning_rate": 0.00017239789706087778, "loss": 1.6078, "step": 7401 }, { "epoch": 0.26508138306444395, "grad_norm": 1.3873393535614014, "learning_rate": 0.0001723898953190931, "loss": 1.6903, "step": 7402 }, { "epoch": 0.2651171951940122, "grad_norm": 1.6361714601516724, "learning_rate": 0.00017238189260339573, "loss": 1.6274, "step": 7403 }, { "epoch": 0.2651530073235805, "grad_norm": 1.1806063652038574, "learning_rate": 0.00017237388891389336, "loss": 1.5139, "step": 7404 }, { "epoch": 0.26518881945314876, "grad_norm": 1.8152517080307007, "learning_rate": 0.0001723658842506937, "loss": 1.3687, "step": 7405 }, { "epoch": 0.2652246315827171, "grad_norm": 1.5166256427764893, "learning_rate": 0.0001723578786139044, "loss": 1.3072, "step": 7406 }, { "epoch": 0.26526044371228535, "grad_norm": 1.715145468711853, "learning_rate": 0.00017234987200363317, "loss": 1.4742, "step": 7407 }, { "epoch": 0.2652962558418536, "grad_norm": 2.2993531227111816, "learning_rate": 0.00017234186441998777, "loss": 1.5068, "step": 7408 }, { "epoch": 0.26533206797142195, "grad_norm": 1.8281476497650146, "learning_rate": 0.00017233385586307588, "loss": 1.6021, "step": 7409 }, { "epoch": 0.2653678801009902, "grad_norm": 1.5883283615112305, "learning_rate": 0.00017232584633300522, "loss": 1.6, "step": 7410 }, { "epoch": 0.2654036922305585, "grad_norm": 1.610416293144226, "learning_rate": 0.00017231783582988367, "loss": 1.6896, "step": 7411 }, { "epoch": 0.26543950436012675, "grad_norm": 1.5972161293029785, "learning_rate": 0.00017230982435381887, "loss": 1.7697, "step": 7412 }, { "epoch": 0.2654753164896951, "grad_norm": 1.538904070854187, "learning_rate": 0.00017230181190491862, "loss": 1.5154, "step": 7413 }, { "epoch": 0.26551112861926335, "grad_norm": 1.459111213684082, "learning_rate": 0.0001722937984832908, "loss": 1.6878, "step": 7414 }, { "epoch": 0.2655469407488316, "grad_norm": 1.890040636062622, "learning_rate": 0.0001722857840890432, "loss": 1.3518, "step": 7415 }, { "epoch": 0.26558275287839994, "grad_norm": 1.6970199346542358, "learning_rate": 0.00017227776872228359, "loss": 1.5614, "step": 7416 }, { "epoch": 0.2656185650079682, "grad_norm": 1.4791874885559082, "learning_rate": 0.00017226975238311982, "loss": 1.4532, "step": 7417 }, { "epoch": 0.2656543771375365, "grad_norm": 1.8702563047409058, "learning_rate": 0.00017226173507165976, "loss": 1.7689, "step": 7418 }, { "epoch": 0.26569018926710475, "grad_norm": 1.5942208766937256, "learning_rate": 0.0001722537167880113, "loss": 1.7556, "step": 7419 }, { "epoch": 0.2657260013966731, "grad_norm": 1.6000324487686157, "learning_rate": 0.00017224569753228225, "loss": 1.5437, "step": 7420 }, { "epoch": 0.26576181352624134, "grad_norm": 2.1063296794891357, "learning_rate": 0.00017223767730458053, "loss": 1.7174, "step": 7421 }, { "epoch": 0.2657976256558096, "grad_norm": 1.3740330934524536, "learning_rate": 0.00017222965610501405, "loss": 1.4734, "step": 7422 }, { "epoch": 0.26583343778537794, "grad_norm": 1.5399854183197021, "learning_rate": 0.00017222163393369071, "loss": 1.5247, "step": 7423 }, { "epoch": 0.2658692499149462, "grad_norm": 1.5186257362365723, "learning_rate": 0.00017221361079071846, "loss": 1.901, "step": 7424 }, { "epoch": 0.2659050620445145, "grad_norm": 1.7701181173324585, "learning_rate": 0.00017220558667620518, "loss": 1.4393, "step": 7425 }, { "epoch": 0.26594087417408274, "grad_norm": 1.7692437171936035, "learning_rate": 0.0001721975615902589, "loss": 1.5675, "step": 7426 }, { "epoch": 0.26597668630365107, "grad_norm": 2.3157219886779785, "learning_rate": 0.00017218953553298759, "loss": 1.5851, "step": 7427 }, { "epoch": 0.26601249843321934, "grad_norm": 1.5588358640670776, "learning_rate": 0.00017218150850449915, "loss": 1.2724, "step": 7428 }, { "epoch": 0.2660483105627876, "grad_norm": 1.2565510272979736, "learning_rate": 0.00017217348050490162, "loss": 1.6188, "step": 7429 }, { "epoch": 0.2660841226923559, "grad_norm": 1.3198089599609375, "learning_rate": 0.00017216545153430303, "loss": 1.4822, "step": 7430 }, { "epoch": 0.2661199348219242, "grad_norm": 1.2001327276229858, "learning_rate": 0.00017215742159281137, "loss": 1.4967, "step": 7431 }, { "epoch": 0.26615574695149247, "grad_norm": 1.8182188272476196, "learning_rate": 0.00017214939068053468, "loss": 1.6206, "step": 7432 }, { "epoch": 0.26619155908106074, "grad_norm": 1.916847586631775, "learning_rate": 0.000172141358797581, "loss": 1.6809, "step": 7433 }, { "epoch": 0.26622737121062906, "grad_norm": 1.1985223293304443, "learning_rate": 0.0001721333259440584, "loss": 1.4388, "step": 7434 }, { "epoch": 0.26626318334019733, "grad_norm": 1.6115180253982544, "learning_rate": 0.00017212529212007492, "loss": 1.6658, "step": 7435 }, { "epoch": 0.2662989954697656, "grad_norm": 1.7065064907073975, "learning_rate": 0.0001721172573257387, "loss": 1.5012, "step": 7436 }, { "epoch": 0.26633480759933387, "grad_norm": 1.26376211643219, "learning_rate": 0.0001721092215611578, "loss": 1.0781, "step": 7437 }, { "epoch": 0.2663706197289022, "grad_norm": 1.3036880493164062, "learning_rate": 0.00017210118482644036, "loss": 1.5433, "step": 7438 }, { "epoch": 0.26640643185847046, "grad_norm": 1.7557674646377563, "learning_rate": 0.00017209314712169445, "loss": 1.449, "step": 7439 }, { "epoch": 0.26644224398803873, "grad_norm": 1.509422779083252, "learning_rate": 0.00017208510844702823, "loss": 1.6469, "step": 7440 }, { "epoch": 0.26647805611760705, "grad_norm": 1.7672717571258545, "learning_rate": 0.00017207706880254987, "loss": 1.6361, "step": 7441 }, { "epoch": 0.2665138682471753, "grad_norm": 1.363512396812439, "learning_rate": 0.00017206902818836756, "loss": 1.2467, "step": 7442 }, { "epoch": 0.2665496803767436, "grad_norm": 2.0854268074035645, "learning_rate": 0.00017206098660458937, "loss": 1.596, "step": 7443 }, { "epoch": 0.26658549250631186, "grad_norm": 1.7014752626419067, "learning_rate": 0.00017205294405132362, "loss": 1.4051, "step": 7444 }, { "epoch": 0.2666213046358802, "grad_norm": 1.4514315128326416, "learning_rate": 0.00017204490052867842, "loss": 1.3514, "step": 7445 }, { "epoch": 0.26665711676544845, "grad_norm": 1.4428623914718628, "learning_rate": 0.00017203685603676202, "loss": 1.6256, "step": 7446 }, { "epoch": 0.2666929288950167, "grad_norm": 2.695467233657837, "learning_rate": 0.0001720288105756826, "loss": 1.8272, "step": 7447 }, { "epoch": 0.26672874102458505, "grad_norm": 1.776180386543274, "learning_rate": 0.0001720207641455485, "loss": 1.6105, "step": 7448 }, { "epoch": 0.2667645531541533, "grad_norm": 1.5230605602264404, "learning_rate": 0.0001720127167464679, "loss": 1.6002, "step": 7449 }, { "epoch": 0.2668003652837216, "grad_norm": 1.4507273435592651, "learning_rate": 0.00017200466837854908, "loss": 1.6159, "step": 7450 }, { "epoch": 0.26683617741328985, "grad_norm": 2.1067185401916504, "learning_rate": 0.00017199661904190037, "loss": 1.3856, "step": 7451 }, { "epoch": 0.2668719895428582, "grad_norm": 1.6292303800582886, "learning_rate": 0.00017198856873662996, "loss": 1.5911, "step": 7452 }, { "epoch": 0.26690780167242645, "grad_norm": 1.3270217180252075, "learning_rate": 0.00017198051746284624, "loss": 1.4468, "step": 7453 }, { "epoch": 0.2669436138019947, "grad_norm": 1.3355484008789062, "learning_rate": 0.00017197246522065752, "loss": 1.4923, "step": 7454 }, { "epoch": 0.26697942593156304, "grad_norm": 1.5357935428619385, "learning_rate": 0.00017196441201017208, "loss": 1.2817, "step": 7455 }, { "epoch": 0.2670152380611313, "grad_norm": 1.796885371208191, "learning_rate": 0.00017195635783149834, "loss": 1.7462, "step": 7456 }, { "epoch": 0.2670510501906996, "grad_norm": 2.4231655597686768, "learning_rate": 0.0001719483026847446, "loss": 1.9039, "step": 7457 }, { "epoch": 0.26708686232026785, "grad_norm": 1.710451602935791, "learning_rate": 0.00017194024657001927, "loss": 1.5666, "step": 7458 }, { "epoch": 0.2671226744498362, "grad_norm": 1.5234856605529785, "learning_rate": 0.0001719321894874307, "loss": 1.6446, "step": 7459 }, { "epoch": 0.26715848657940444, "grad_norm": 1.9876848459243774, "learning_rate": 0.00017192413143708735, "loss": 1.6329, "step": 7460 }, { "epoch": 0.2671942987089727, "grad_norm": 1.7578610181808472, "learning_rate": 0.00017191607241909753, "loss": 1.5056, "step": 7461 }, { "epoch": 0.26723011083854104, "grad_norm": 1.4033610820770264, "learning_rate": 0.00017190801243356977, "loss": 1.4246, "step": 7462 }, { "epoch": 0.2672659229681093, "grad_norm": 1.780724048614502, "learning_rate": 0.0001718999514806124, "loss": 1.3211, "step": 7463 }, { "epoch": 0.2673017350976776, "grad_norm": 2.1150660514831543, "learning_rate": 0.000171891889560334, "loss": 1.5886, "step": 7464 }, { "epoch": 0.26733754722724584, "grad_norm": 1.8331183195114136, "learning_rate": 0.0001718838266728429, "loss": 1.4792, "step": 7465 }, { "epoch": 0.26737335935681417, "grad_norm": 1.8556559085845947, "learning_rate": 0.00017187576281824766, "loss": 1.5132, "step": 7466 }, { "epoch": 0.26740917148638244, "grad_norm": 1.7563835382461548, "learning_rate": 0.00017186769799665673, "loss": 1.5768, "step": 7467 }, { "epoch": 0.2674449836159507, "grad_norm": 2.372431516647339, "learning_rate": 0.00017185963220817864, "loss": 1.5395, "step": 7468 }, { "epoch": 0.26748079574551903, "grad_norm": 1.086047649383545, "learning_rate": 0.0001718515654529219, "loss": 1.3656, "step": 7469 }, { "epoch": 0.2675166078750873, "grad_norm": 1.8703947067260742, "learning_rate": 0.000171843497730995, "loss": 1.6605, "step": 7470 }, { "epoch": 0.26755242000465557, "grad_norm": 1.627095103263855, "learning_rate": 0.00017183542904250656, "loss": 1.6894, "step": 7471 }, { "epoch": 0.26758823213422384, "grad_norm": 1.8694849014282227, "learning_rate": 0.00017182735938756506, "loss": 1.4633, "step": 7472 }, { "epoch": 0.26762404426379216, "grad_norm": 1.7789463996887207, "learning_rate": 0.00017181928876627907, "loss": 1.5519, "step": 7473 }, { "epoch": 0.26765985639336043, "grad_norm": 2.8911938667297363, "learning_rate": 0.0001718112171787572, "loss": 1.5961, "step": 7474 }, { "epoch": 0.2676956685229287, "grad_norm": 1.5189895629882812, "learning_rate": 0.000171803144625108, "loss": 1.401, "step": 7475 }, { "epoch": 0.267731480652497, "grad_norm": 1.9463037252426147, "learning_rate": 0.00017179507110544014, "loss": 1.8663, "step": 7476 }, { "epoch": 0.2677672927820653, "grad_norm": 1.4575629234313965, "learning_rate": 0.0001717869966198622, "loss": 1.5934, "step": 7477 }, { "epoch": 0.26780310491163356, "grad_norm": 1.911528468132019, "learning_rate": 0.00017177892116848284, "loss": 1.572, "step": 7478 }, { "epoch": 0.26783891704120183, "grad_norm": 1.543802261352539, "learning_rate": 0.00017177084475141069, "loss": 1.7237, "step": 7479 }, { "epoch": 0.26787472917077015, "grad_norm": 1.4773614406585693, "learning_rate": 0.0001717627673687544, "loss": 1.6445, "step": 7480 }, { "epoch": 0.2679105413003384, "grad_norm": 1.8544567823410034, "learning_rate": 0.0001717546890206226, "loss": 1.2569, "step": 7481 }, { "epoch": 0.2679463534299067, "grad_norm": 1.9463386535644531, "learning_rate": 0.00017174660970712403, "loss": 1.5379, "step": 7482 }, { "epoch": 0.267982165559475, "grad_norm": 2.0309228897094727, "learning_rate": 0.00017173852942836739, "loss": 1.6161, "step": 7483 }, { "epoch": 0.2680179776890433, "grad_norm": 1.7174644470214844, "learning_rate": 0.00017173044818446137, "loss": 1.6121, "step": 7484 }, { "epoch": 0.26805378981861155, "grad_norm": 1.8857377767562866, "learning_rate": 0.00017172236597551467, "loss": 1.6238, "step": 7485 }, { "epoch": 0.2680896019481798, "grad_norm": 1.3329565525054932, "learning_rate": 0.0001717142828016361, "loss": 1.7221, "step": 7486 }, { "epoch": 0.26812541407774815, "grad_norm": 1.682708978652954, "learning_rate": 0.00017170619866293434, "loss": 1.1758, "step": 7487 }, { "epoch": 0.2681612262073164, "grad_norm": 1.6127010583877563, "learning_rate": 0.00017169811355951815, "loss": 1.5131, "step": 7488 }, { "epoch": 0.2681970383368847, "grad_norm": 1.7014356851577759, "learning_rate": 0.0001716900274914963, "loss": 1.3939, "step": 7489 }, { "epoch": 0.268232850466453, "grad_norm": 1.85292387008667, "learning_rate": 0.00017168194045897767, "loss": 1.434, "step": 7490 }, { "epoch": 0.2682686625960213, "grad_norm": 1.6793440580368042, "learning_rate": 0.000171673852462071, "loss": 1.3301, "step": 7491 }, { "epoch": 0.26830447472558955, "grad_norm": 1.561973214149475, "learning_rate": 0.00017166576350088506, "loss": 1.2769, "step": 7492 }, { "epoch": 0.2683402868551578, "grad_norm": 1.6289607286453247, "learning_rate": 0.0001716576735755287, "loss": 1.4536, "step": 7493 }, { "epoch": 0.26837609898472614, "grad_norm": 1.7525798082351685, "learning_rate": 0.00017164958268611077, "loss": 1.3704, "step": 7494 }, { "epoch": 0.2684119111142944, "grad_norm": 1.8854219913482666, "learning_rate": 0.00017164149083274017, "loss": 1.6748, "step": 7495 }, { "epoch": 0.2684477232438627, "grad_norm": 1.3732788562774658, "learning_rate": 0.0001716333980155257, "loss": 1.5534, "step": 7496 }, { "epoch": 0.268483535373431, "grad_norm": 1.6843669414520264, "learning_rate": 0.00017162530423457626, "loss": 1.7485, "step": 7497 }, { "epoch": 0.2685193475029993, "grad_norm": 1.746842861175537, "learning_rate": 0.00017161720949000075, "loss": 1.7009, "step": 7498 }, { "epoch": 0.26855515963256754, "grad_norm": 2.6704607009887695, "learning_rate": 0.00017160911378190808, "loss": 1.4903, "step": 7499 }, { "epoch": 0.2685909717621358, "grad_norm": 1.9854313135147095, "learning_rate": 0.00017160101711040713, "loss": 1.7229, "step": 7500 }, { "epoch": 0.26862678389170414, "grad_norm": 2.014387845993042, "learning_rate": 0.00017159291947560682, "loss": 1.5854, "step": 7501 }, { "epoch": 0.2686625960212724, "grad_norm": 1.4004807472229004, "learning_rate": 0.00017158482087761617, "loss": 1.5235, "step": 7502 }, { "epoch": 0.2686984081508407, "grad_norm": 1.545235276222229, "learning_rate": 0.0001715767213165441, "loss": 1.6951, "step": 7503 }, { "epoch": 0.268734220280409, "grad_norm": 1.4803507328033447, "learning_rate": 0.00017156862079249953, "loss": 1.7183, "step": 7504 }, { "epoch": 0.26877003240997727, "grad_norm": 1.6288284063339233, "learning_rate": 0.00017156051930559155, "loss": 1.6609, "step": 7505 }, { "epoch": 0.26880584453954554, "grad_norm": 1.784579873085022, "learning_rate": 0.00017155241685592903, "loss": 1.6495, "step": 7506 }, { "epoch": 0.2688416566691138, "grad_norm": 1.2053595781326294, "learning_rate": 0.00017154431344362106, "loss": 1.4678, "step": 7507 }, { "epoch": 0.26887746879868213, "grad_norm": 1.9320080280303955, "learning_rate": 0.00017153620906877666, "loss": 1.6605, "step": 7508 }, { "epoch": 0.2689132809282504, "grad_norm": 1.4452803134918213, "learning_rate": 0.00017152810373150478, "loss": 1.6794, "step": 7509 }, { "epoch": 0.26894909305781867, "grad_norm": 1.9542020559310913, "learning_rate": 0.00017151999743191456, "loss": 1.4616, "step": 7510 }, { "epoch": 0.268984905187387, "grad_norm": 2.002528667449951, "learning_rate": 0.00017151189017011503, "loss": 1.7598, "step": 7511 }, { "epoch": 0.26902071731695526, "grad_norm": 1.4809521436691284, "learning_rate": 0.00017150378194621529, "loss": 1.5741, "step": 7512 }, { "epoch": 0.26905652944652353, "grad_norm": 2.5902462005615234, "learning_rate": 0.0001714956727603244, "loss": 1.5187, "step": 7513 }, { "epoch": 0.2690923415760918, "grad_norm": 1.7048165798187256, "learning_rate": 0.0001714875626125514, "loss": 1.5129, "step": 7514 }, { "epoch": 0.2691281537056601, "grad_norm": 1.714754343032837, "learning_rate": 0.0001714794515030055, "loss": 1.9192, "step": 7515 }, { "epoch": 0.2691639658352284, "grad_norm": 1.5746515989303589, "learning_rate": 0.00017147133943179577, "loss": 1.5068, "step": 7516 }, { "epoch": 0.26919977796479666, "grad_norm": 1.5344665050506592, "learning_rate": 0.00017146322639903137, "loss": 1.6429, "step": 7517 }, { "epoch": 0.269235590094365, "grad_norm": 1.722382664680481, "learning_rate": 0.00017145511240482142, "loss": 1.3101, "step": 7518 }, { "epoch": 0.26927140222393325, "grad_norm": 1.7240898609161377, "learning_rate": 0.00017144699744927507, "loss": 1.657, "step": 7519 }, { "epoch": 0.2693072143535015, "grad_norm": 2.000774383544922, "learning_rate": 0.0001714388815325016, "loss": 1.4911, "step": 7520 }, { "epoch": 0.2693430264830698, "grad_norm": 2.6266844272613525, "learning_rate": 0.0001714307646546101, "loss": 1.7797, "step": 7521 }, { "epoch": 0.2693788386126381, "grad_norm": 1.6065651178359985, "learning_rate": 0.00017142264681570978, "loss": 1.6816, "step": 7522 }, { "epoch": 0.2694146507422064, "grad_norm": 1.504185438156128, "learning_rate": 0.00017141452801590988, "loss": 1.2781, "step": 7523 }, { "epoch": 0.26945046287177465, "grad_norm": 1.5775504112243652, "learning_rate": 0.00017140640825531967, "loss": 1.6476, "step": 7524 }, { "epoch": 0.269486275001343, "grad_norm": 1.482826828956604, "learning_rate": 0.0001713982875340483, "loss": 1.7394, "step": 7525 }, { "epoch": 0.26952208713091125, "grad_norm": 2.138429641723633, "learning_rate": 0.00017139016585220512, "loss": 1.6288, "step": 7526 }, { "epoch": 0.2695578992604795, "grad_norm": 1.5291169881820679, "learning_rate": 0.0001713820432098993, "loss": 1.3784, "step": 7527 }, { "epoch": 0.2695937113900478, "grad_norm": 1.7900766134262085, "learning_rate": 0.00017137391960724013, "loss": 1.4586, "step": 7528 }, { "epoch": 0.2696295235196161, "grad_norm": 1.9882597923278809, "learning_rate": 0.000171365795044337, "loss": 1.5581, "step": 7529 }, { "epoch": 0.2696653356491844, "grad_norm": 1.6502845287322998, "learning_rate": 0.00017135766952129913, "loss": 1.4082, "step": 7530 }, { "epoch": 0.26970114777875265, "grad_norm": 1.6091289520263672, "learning_rate": 0.00017134954303823588, "loss": 1.5386, "step": 7531 }, { "epoch": 0.269736959908321, "grad_norm": 1.4166127443313599, "learning_rate": 0.00017134141559525654, "loss": 1.4206, "step": 7532 }, { "epoch": 0.26977277203788924, "grad_norm": 1.5068105459213257, "learning_rate": 0.00017133328719247048, "loss": 1.3298, "step": 7533 }, { "epoch": 0.2698085841674575, "grad_norm": 1.52097749710083, "learning_rate": 0.00017132515782998704, "loss": 1.3535, "step": 7534 }, { "epoch": 0.2698443962970258, "grad_norm": 1.7753351926803589, "learning_rate": 0.00017131702750791564, "loss": 1.2874, "step": 7535 }, { "epoch": 0.2698802084265941, "grad_norm": 1.4764755964279175, "learning_rate": 0.0001713088962263656, "loss": 1.6138, "step": 7536 }, { "epoch": 0.2699160205561624, "grad_norm": 1.4922411441802979, "learning_rate": 0.00017130076398544635, "loss": 1.7891, "step": 7537 }, { "epoch": 0.26995183268573064, "grad_norm": 1.7627573013305664, "learning_rate": 0.0001712926307852673, "loss": 1.4984, "step": 7538 }, { "epoch": 0.26998764481529897, "grad_norm": 1.7921463251113892, "learning_rate": 0.00017128449662593786, "loss": 1.5461, "step": 7539 }, { "epoch": 0.27002345694486724, "grad_norm": 1.4542369842529297, "learning_rate": 0.00017127636150756747, "loss": 1.7536, "step": 7540 }, { "epoch": 0.2700592690744355, "grad_norm": 1.5525248050689697, "learning_rate": 0.00017126822543026555, "loss": 1.6489, "step": 7541 }, { "epoch": 0.2700950812040038, "grad_norm": 2.08571720123291, "learning_rate": 0.00017126008839414163, "loss": 1.8435, "step": 7542 }, { "epoch": 0.2701308933335721, "grad_norm": 1.3870586156845093, "learning_rate": 0.00017125195039930508, "loss": 1.3148, "step": 7543 }, { "epoch": 0.27016670546314037, "grad_norm": 3.187460422515869, "learning_rate": 0.0001712438114458655, "loss": 1.6014, "step": 7544 }, { "epoch": 0.27020251759270864, "grad_norm": 1.4459199905395508, "learning_rate": 0.00017123567153393233, "loss": 1.4049, "step": 7545 }, { "epoch": 0.27023832972227696, "grad_norm": 2.3334615230560303, "learning_rate": 0.00017122753066361508, "loss": 1.542, "step": 7546 }, { "epoch": 0.27027414185184523, "grad_norm": 1.4789403676986694, "learning_rate": 0.00017121938883502328, "loss": 1.7571, "step": 7547 }, { "epoch": 0.2703099539814135, "grad_norm": 1.5586892366409302, "learning_rate": 0.00017121124604826645, "loss": 1.6486, "step": 7548 }, { "epoch": 0.27034576611098177, "grad_norm": 1.6130740642547607, "learning_rate": 0.00017120310230345418, "loss": 1.3414, "step": 7549 }, { "epoch": 0.2703815782405501, "grad_norm": 1.3961517810821533, "learning_rate": 0.000171194957600696, "loss": 1.4985, "step": 7550 }, { "epoch": 0.27041739037011836, "grad_norm": 1.2499228715896606, "learning_rate": 0.00017118681194010153, "loss": 1.2782, "step": 7551 }, { "epoch": 0.27045320249968663, "grad_norm": 2.04305362701416, "learning_rate": 0.0001711786653217803, "loss": 1.409, "step": 7552 }, { "epoch": 0.27048901462925495, "grad_norm": 1.6010410785675049, "learning_rate": 0.00017117051774584194, "loss": 1.324, "step": 7553 }, { "epoch": 0.2705248267588232, "grad_norm": 1.6431407928466797, "learning_rate": 0.00017116236921239607, "loss": 1.4098, "step": 7554 }, { "epoch": 0.2705606388883915, "grad_norm": 1.8824855089187622, "learning_rate": 0.00017115421972155234, "loss": 1.498, "step": 7555 }, { "epoch": 0.27059645101795976, "grad_norm": 1.4907480478286743, "learning_rate": 0.00017114606927342036, "loss": 1.4708, "step": 7556 }, { "epoch": 0.2706322631475281, "grad_norm": 1.5933283567428589, "learning_rate": 0.0001711379178681098, "loss": 1.3108, "step": 7557 }, { "epoch": 0.27066807527709635, "grad_norm": 1.7678366899490356, "learning_rate": 0.00017112976550573026, "loss": 1.6437, "step": 7558 }, { "epoch": 0.2707038874066646, "grad_norm": 1.8513715267181396, "learning_rate": 0.00017112161218639152, "loss": 1.6796, "step": 7559 }, { "epoch": 0.27073969953623295, "grad_norm": 1.9389971494674683, "learning_rate": 0.00017111345791020324, "loss": 1.5358, "step": 7560 }, { "epoch": 0.2707755116658012, "grad_norm": 1.4210546016693115, "learning_rate": 0.0001711053026772751, "loss": 1.546, "step": 7561 }, { "epoch": 0.2708113237953695, "grad_norm": 2.9331257343292236, "learning_rate": 0.00017109714648771683, "loss": 1.4633, "step": 7562 }, { "epoch": 0.27084713592493775, "grad_norm": 2.1464321613311768, "learning_rate": 0.00017108898934163814, "loss": 1.9307, "step": 7563 }, { "epoch": 0.2708829480545061, "grad_norm": 1.5199549198150635, "learning_rate": 0.0001710808312391488, "loss": 1.1444, "step": 7564 }, { "epoch": 0.27091876018407435, "grad_norm": 1.5869702100753784, "learning_rate": 0.0001710726721803586, "loss": 1.3033, "step": 7565 }, { "epoch": 0.2709545723136426, "grad_norm": 1.7587640285491943, "learning_rate": 0.00017106451216537723, "loss": 1.5064, "step": 7566 }, { "epoch": 0.27099038444321094, "grad_norm": 1.4721653461456299, "learning_rate": 0.00017105635119431457, "loss": 1.4631, "step": 7567 }, { "epoch": 0.2710261965727792, "grad_norm": 1.7774546146392822, "learning_rate": 0.0001710481892672803, "loss": 1.5663, "step": 7568 }, { "epoch": 0.2710620087023475, "grad_norm": 1.6262949705123901, "learning_rate": 0.00017104002638438433, "loss": 1.7109, "step": 7569 }, { "epoch": 0.27109782083191575, "grad_norm": 1.9079837799072266, "learning_rate": 0.00017103186254573642, "loss": 1.5319, "step": 7570 }, { "epoch": 0.2711336329614841, "grad_norm": 2.4740493297576904, "learning_rate": 0.00017102369775144643, "loss": 1.4753, "step": 7571 }, { "epoch": 0.27116944509105234, "grad_norm": 1.6783044338226318, "learning_rate": 0.0001710155320016242, "loss": 1.6086, "step": 7572 }, { "epoch": 0.2712052572206206, "grad_norm": 1.438635230064392, "learning_rate": 0.00017100736529637958, "loss": 1.4598, "step": 7573 }, { "epoch": 0.27124106935018893, "grad_norm": 1.3612160682678223, "learning_rate": 0.0001709991976358225, "loss": 1.766, "step": 7574 }, { "epoch": 0.2712768814797572, "grad_norm": 2.465081214904785, "learning_rate": 0.00017099102902006275, "loss": 1.5949, "step": 7575 }, { "epoch": 0.2713126936093255, "grad_norm": 2.009199380874634, "learning_rate": 0.00017098285944921028, "loss": 1.9, "step": 7576 }, { "epoch": 0.27134850573889374, "grad_norm": 1.7520637512207031, "learning_rate": 0.00017097468892337503, "loss": 1.6389, "step": 7577 }, { "epoch": 0.27138431786846207, "grad_norm": 1.6367089748382568, "learning_rate": 0.00017096651744266686, "loss": 1.7878, "step": 7578 }, { "epoch": 0.27142012999803034, "grad_norm": 1.7914072275161743, "learning_rate": 0.00017095834500719574, "loss": 1.5384, "step": 7579 }, { "epoch": 0.2714559421275986, "grad_norm": 1.9473427534103394, "learning_rate": 0.00017095017161707164, "loss": 1.5749, "step": 7580 }, { "epoch": 0.27149175425716693, "grad_norm": 1.9787449836730957, "learning_rate": 0.00017094199727240447, "loss": 1.6192, "step": 7581 }, { "epoch": 0.2715275663867352, "grad_norm": 1.2734183073043823, "learning_rate": 0.00017093382197330427, "loss": 1.3988, "step": 7582 }, { "epoch": 0.27156337851630347, "grad_norm": 1.445749044418335, "learning_rate": 0.00017092564571988096, "loss": 1.4681, "step": 7583 }, { "epoch": 0.27159919064587174, "grad_norm": 1.8988313674926758, "learning_rate": 0.0001709174685122446, "loss": 1.3218, "step": 7584 }, { "epoch": 0.27163500277544006, "grad_norm": 1.7073367834091187, "learning_rate": 0.00017090929035050513, "loss": 1.6704, "step": 7585 }, { "epoch": 0.27167081490500833, "grad_norm": 1.4656105041503906, "learning_rate": 0.00017090111123477266, "loss": 1.4475, "step": 7586 }, { "epoch": 0.2717066270345766, "grad_norm": 2.0253167152404785, "learning_rate": 0.0001708929311651572, "loss": 1.7153, "step": 7587 }, { "epoch": 0.2717424391641449, "grad_norm": 1.6637212038040161, "learning_rate": 0.0001708847501417688, "loss": 1.4888, "step": 7588 }, { "epoch": 0.2717782512937132, "grad_norm": 1.6039817333221436, "learning_rate": 0.00017087656816471754, "loss": 1.2485, "step": 7589 }, { "epoch": 0.27181406342328146, "grad_norm": 1.811334490776062, "learning_rate": 0.00017086838523411343, "loss": 1.7239, "step": 7590 }, { "epoch": 0.27184987555284973, "grad_norm": 1.2404311895370483, "learning_rate": 0.00017086020135006664, "loss": 1.2436, "step": 7591 }, { "epoch": 0.27188568768241805, "grad_norm": 1.3640433549880981, "learning_rate": 0.00017085201651268722, "loss": 1.4845, "step": 7592 }, { "epoch": 0.2719214998119863, "grad_norm": 2.0693013668060303, "learning_rate": 0.00017084383072208534, "loss": 1.7518, "step": 7593 }, { "epoch": 0.2719573119415546, "grad_norm": 1.524749994277954, "learning_rate": 0.00017083564397837108, "loss": 1.6015, "step": 7594 }, { "epoch": 0.2719931240711229, "grad_norm": 1.4143753051757812, "learning_rate": 0.00017082745628165463, "loss": 1.6716, "step": 7595 }, { "epoch": 0.2720289362006912, "grad_norm": 1.6675052642822266, "learning_rate": 0.0001708192676320461, "loss": 1.3527, "step": 7596 }, { "epoch": 0.27206474833025945, "grad_norm": 1.9079591035842896, "learning_rate": 0.00017081107802965564, "loss": 1.601, "step": 7597 }, { "epoch": 0.2721005604598277, "grad_norm": 1.47043776512146, "learning_rate": 0.0001708028874745935, "loss": 1.6689, "step": 7598 }, { "epoch": 0.27213637258939605, "grad_norm": 1.9490792751312256, "learning_rate": 0.0001707946959669698, "loss": 1.3982, "step": 7599 }, { "epoch": 0.2721721847189643, "grad_norm": 1.7489452362060547, "learning_rate": 0.00017078650350689482, "loss": 1.4841, "step": 7600 }, { "epoch": 0.2722079968485326, "grad_norm": 1.3652783632278442, "learning_rate": 0.00017077831009447878, "loss": 1.4928, "step": 7601 }, { "epoch": 0.2722438089781009, "grad_norm": 1.3833941221237183, "learning_rate": 0.00017077011572983183, "loss": 1.2949, "step": 7602 }, { "epoch": 0.2722796211076692, "grad_norm": 1.9464653730392456, "learning_rate": 0.00017076192041306425, "loss": 1.6098, "step": 7603 }, { "epoch": 0.27231543323723745, "grad_norm": 1.9903013706207275, "learning_rate": 0.00017075372414428633, "loss": 1.9585, "step": 7604 }, { "epoch": 0.2723512453668057, "grad_norm": 1.821960210800171, "learning_rate": 0.00017074552692360832, "loss": 1.3332, "step": 7605 }, { "epoch": 0.27238705749637404, "grad_norm": 2.120266914367676, "learning_rate": 0.00017073732875114045, "loss": 1.465, "step": 7606 }, { "epoch": 0.2724228696259423, "grad_norm": 1.6445937156677246, "learning_rate": 0.0001707291296269931, "loss": 1.5826, "step": 7607 }, { "epoch": 0.2724586817555106, "grad_norm": 1.6000337600708008, "learning_rate": 0.00017072092955127657, "loss": 1.4611, "step": 7608 }, { "epoch": 0.2724944938850789, "grad_norm": 1.9415369033813477, "learning_rate": 0.00017071272852410113, "loss": 1.5284, "step": 7609 }, { "epoch": 0.2725303060146472, "grad_norm": 1.6123709678649902, "learning_rate": 0.00017070452654557717, "loss": 1.5851, "step": 7610 }, { "epoch": 0.27256611814421544, "grad_norm": 1.5228267908096313, "learning_rate": 0.00017069632361581497, "loss": 1.8117, "step": 7611 }, { "epoch": 0.2726019302737837, "grad_norm": 1.7121150493621826, "learning_rate": 0.00017068811973492497, "loss": 1.6039, "step": 7612 }, { "epoch": 0.27263774240335203, "grad_norm": 1.8781545162200928, "learning_rate": 0.00017067991490301744, "loss": 1.5765, "step": 7613 }, { "epoch": 0.2726735545329203, "grad_norm": 2.0894546508789062, "learning_rate": 0.00017067170912020286, "loss": 1.5713, "step": 7614 }, { "epoch": 0.2727093666624886, "grad_norm": 1.5253492593765259, "learning_rate": 0.0001706635023865916, "loss": 1.4626, "step": 7615 }, { "epoch": 0.2727451787920569, "grad_norm": 1.6403868198394775, "learning_rate": 0.00017065529470229403, "loss": 1.5156, "step": 7616 }, { "epoch": 0.27278099092162517, "grad_norm": 1.4445627927780151, "learning_rate": 0.00017064708606742067, "loss": 1.7814, "step": 7617 }, { "epoch": 0.27281680305119343, "grad_norm": 1.6161423921585083, "learning_rate": 0.00017063887648208185, "loss": 1.8426, "step": 7618 }, { "epoch": 0.2728526151807617, "grad_norm": 2.159152030944824, "learning_rate": 0.00017063066594638805, "loss": 1.6711, "step": 7619 }, { "epoch": 0.27288842731033003, "grad_norm": 1.585121989250183, "learning_rate": 0.0001706224544604498, "loss": 1.6355, "step": 7620 }, { "epoch": 0.2729242394398983, "grad_norm": 2.4911491870880127, "learning_rate": 0.00017061424202437748, "loss": 1.8799, "step": 7621 }, { "epoch": 0.27296005156946657, "grad_norm": 1.3590527772903442, "learning_rate": 0.00017060602863828165, "loss": 1.3842, "step": 7622 }, { "epoch": 0.2729958636990349, "grad_norm": 1.6807591915130615, "learning_rate": 0.00017059781430227275, "loss": 1.8145, "step": 7623 }, { "epoch": 0.27303167582860316, "grad_norm": 1.9049506187438965, "learning_rate": 0.00017058959901646134, "loss": 1.5613, "step": 7624 }, { "epoch": 0.27306748795817143, "grad_norm": 1.9668833017349243, "learning_rate": 0.00017058138278095792, "loss": 1.3905, "step": 7625 }, { "epoch": 0.2731033000877397, "grad_norm": 1.8975080251693726, "learning_rate": 0.00017057316559587307, "loss": 1.6166, "step": 7626 }, { "epoch": 0.273139112217308, "grad_norm": 1.7440699338912964, "learning_rate": 0.00017056494746131725, "loss": 1.5235, "step": 7627 }, { "epoch": 0.2731749243468763, "grad_norm": 1.5745868682861328, "learning_rate": 0.00017055672837740113, "loss": 1.4461, "step": 7628 }, { "epoch": 0.27321073647644456, "grad_norm": 1.4910067319869995, "learning_rate": 0.00017054850834423522, "loss": 1.6589, "step": 7629 }, { "epoch": 0.27324654860601283, "grad_norm": 1.9283604621887207, "learning_rate": 0.00017054028736193013, "loss": 1.3877, "step": 7630 }, { "epoch": 0.27328236073558115, "grad_norm": 1.5712474584579468, "learning_rate": 0.00017053206543059647, "loss": 1.6484, "step": 7631 }, { "epoch": 0.2733181728651494, "grad_norm": 1.4814081192016602, "learning_rate": 0.00017052384255034485, "loss": 1.58, "step": 7632 }, { "epoch": 0.2733539849947177, "grad_norm": 2.332150936126709, "learning_rate": 0.00017051561872128592, "loss": 1.4022, "step": 7633 }, { "epoch": 0.273389797124286, "grad_norm": 1.6437220573425293, "learning_rate": 0.00017050739394353028, "loss": 1.4863, "step": 7634 }, { "epoch": 0.2734256092538543, "grad_norm": 2.036505937576294, "learning_rate": 0.00017049916821718861, "loss": 1.5958, "step": 7635 }, { "epoch": 0.27346142138342255, "grad_norm": 1.2595261335372925, "learning_rate": 0.00017049094154237155, "loss": 1.4669, "step": 7636 }, { "epoch": 0.2734972335129908, "grad_norm": 1.5755056142807007, "learning_rate": 0.0001704827139191898, "loss": 1.6032, "step": 7637 }, { "epoch": 0.27353304564255915, "grad_norm": 1.5014159679412842, "learning_rate": 0.00017047448534775406, "loss": 1.6173, "step": 7638 }, { "epoch": 0.2735688577721274, "grad_norm": 1.7377209663391113, "learning_rate": 0.00017046625582817503, "loss": 1.4034, "step": 7639 }, { "epoch": 0.2736046699016957, "grad_norm": 1.9546681642532349, "learning_rate": 0.00017045802536056344, "loss": 1.7961, "step": 7640 }, { "epoch": 0.273640482031264, "grad_norm": 1.5216879844665527, "learning_rate": 0.00017044979394502995, "loss": 1.6273, "step": 7641 }, { "epoch": 0.2736762941608323, "grad_norm": 2.2344608306884766, "learning_rate": 0.0001704415615816854, "loss": 1.5878, "step": 7642 }, { "epoch": 0.27371210629040055, "grad_norm": 1.5303434133529663, "learning_rate": 0.0001704333282706405, "loss": 1.5562, "step": 7643 }, { "epoch": 0.2737479184199688, "grad_norm": 2.065340280532837, "learning_rate": 0.00017042509401200598, "loss": 1.6453, "step": 7644 }, { "epoch": 0.27378373054953714, "grad_norm": 1.7603098154067993, "learning_rate": 0.00017041685880589272, "loss": 1.7976, "step": 7645 }, { "epoch": 0.2738195426791054, "grad_norm": 1.29735267162323, "learning_rate": 0.0001704086226524114, "loss": 1.4381, "step": 7646 }, { "epoch": 0.2738553548086737, "grad_norm": 1.7243160009384155, "learning_rate": 0.0001704003855516729, "loss": 1.6664, "step": 7647 }, { "epoch": 0.273891166938242, "grad_norm": 2.5377590656280518, "learning_rate": 0.00017039214750378805, "loss": 1.7078, "step": 7648 }, { "epoch": 0.2739269790678103, "grad_norm": 2.229074478149414, "learning_rate": 0.00017038390850886766, "loss": 1.4732, "step": 7649 }, { "epoch": 0.27396279119737854, "grad_norm": 1.3973140716552734, "learning_rate": 0.00017037566856702255, "loss": 1.3579, "step": 7650 }, { "epoch": 0.2739986033269468, "grad_norm": 1.4385688304901123, "learning_rate": 0.00017036742767836355, "loss": 1.6784, "step": 7651 }, { "epoch": 0.27403441545651513, "grad_norm": 1.4046844244003296, "learning_rate": 0.00017035918584300163, "loss": 1.4343, "step": 7652 }, { "epoch": 0.2740702275860834, "grad_norm": 1.4777336120605469, "learning_rate": 0.00017035094306104762, "loss": 1.752, "step": 7653 }, { "epoch": 0.2741060397156517, "grad_norm": 1.6677403450012207, "learning_rate": 0.0001703426993326124, "loss": 1.5784, "step": 7654 }, { "epoch": 0.27414185184522, "grad_norm": 1.8168667554855347, "learning_rate": 0.0001703344546578069, "loss": 1.7299, "step": 7655 }, { "epoch": 0.27417766397478827, "grad_norm": 1.6721216440200806, "learning_rate": 0.00017032620903674207, "loss": 1.5574, "step": 7656 }, { "epoch": 0.27421347610435653, "grad_norm": 1.7680671215057373, "learning_rate": 0.0001703179624695288, "loss": 1.3656, "step": 7657 }, { "epoch": 0.2742492882339248, "grad_norm": 2.2017982006073, "learning_rate": 0.00017030971495627802, "loss": 1.5316, "step": 7658 }, { "epoch": 0.27428510036349313, "grad_norm": 1.4911608695983887, "learning_rate": 0.00017030146649710072, "loss": 1.6055, "step": 7659 }, { "epoch": 0.2743209124930614, "grad_norm": 1.9482650756835938, "learning_rate": 0.00017029321709210787, "loss": 1.7288, "step": 7660 }, { "epoch": 0.27435672462262967, "grad_norm": 1.816979169845581, "learning_rate": 0.00017028496674141051, "loss": 1.1743, "step": 7661 }, { "epoch": 0.274392536752198, "grad_norm": 1.9508891105651855, "learning_rate": 0.0001702767154451195, "loss": 1.5504, "step": 7662 }, { "epoch": 0.27442834888176626, "grad_norm": 1.4764585494995117, "learning_rate": 0.000170268463203346, "loss": 1.5337, "step": 7663 }, { "epoch": 0.27446416101133453, "grad_norm": 1.4491671323776245, "learning_rate": 0.00017026021001620095, "loss": 1.6598, "step": 7664 }, { "epoch": 0.2744999731409028, "grad_norm": 1.9225811958312988, "learning_rate": 0.00017025195588379538, "loss": 1.4929, "step": 7665 }, { "epoch": 0.2745357852704711, "grad_norm": 1.355978012084961, "learning_rate": 0.0001702437008062404, "loss": 1.1573, "step": 7666 }, { "epoch": 0.2745715974000394, "grad_norm": 1.796846866607666, "learning_rate": 0.00017023544478364698, "loss": 1.3795, "step": 7667 }, { "epoch": 0.27460740952960766, "grad_norm": 1.581048846244812, "learning_rate": 0.0001702271878161263, "loss": 1.365, "step": 7668 }, { "epoch": 0.274643221659176, "grad_norm": 1.55429208278656, "learning_rate": 0.0001702189299037894, "loss": 1.5851, "step": 7669 }, { "epoch": 0.27467903378874425, "grad_norm": 1.660404920578003, "learning_rate": 0.00017021067104674734, "loss": 2.0028, "step": 7670 }, { "epoch": 0.2747148459183125, "grad_norm": 2.531282663345337, "learning_rate": 0.00017020241124511128, "loss": 1.6508, "step": 7671 }, { "epoch": 0.2747506580478808, "grad_norm": 1.8213257789611816, "learning_rate": 0.0001701941504989923, "loss": 1.4164, "step": 7672 }, { "epoch": 0.2747864701774491, "grad_norm": 1.5226930379867554, "learning_rate": 0.00017018588880850162, "loss": 1.7048, "step": 7673 }, { "epoch": 0.2748222823070174, "grad_norm": 1.236707329750061, "learning_rate": 0.0001701776261737503, "loss": 1.4652, "step": 7674 }, { "epoch": 0.27485809443658565, "grad_norm": 1.5473086833953857, "learning_rate": 0.00017016936259484953, "loss": 1.5635, "step": 7675 }, { "epoch": 0.274893906566154, "grad_norm": 1.5289727449417114, "learning_rate": 0.00017016109807191056, "loss": 1.5221, "step": 7676 }, { "epoch": 0.27492971869572225, "grad_norm": 1.2557204961776733, "learning_rate": 0.00017015283260504447, "loss": 1.5362, "step": 7677 }, { "epoch": 0.2749655308252905, "grad_norm": 1.6917392015457153, "learning_rate": 0.00017014456619436253, "loss": 1.5005, "step": 7678 }, { "epoch": 0.2750013429548588, "grad_norm": 1.553214192390442, "learning_rate": 0.00017013629883997594, "loss": 1.6225, "step": 7679 }, { "epoch": 0.2750371550844271, "grad_norm": 1.3000906705856323, "learning_rate": 0.00017012803054199587, "loss": 1.6618, "step": 7680 }, { "epoch": 0.2750729672139954, "grad_norm": 1.7456116676330566, "learning_rate": 0.00017011976130053367, "loss": 1.4712, "step": 7681 }, { "epoch": 0.27510877934356365, "grad_norm": 1.459320068359375, "learning_rate": 0.00017011149111570051, "loss": 1.6217, "step": 7682 }, { "epoch": 0.27514459147313197, "grad_norm": 2.354222297668457, "learning_rate": 0.00017010321998760762, "loss": 1.6253, "step": 7683 }, { "epoch": 0.27518040360270024, "grad_norm": 1.62624192237854, "learning_rate": 0.0001700949479163664, "loss": 1.4179, "step": 7684 }, { "epoch": 0.2752162157322685, "grad_norm": 1.8651334047317505, "learning_rate": 0.00017008667490208803, "loss": 1.4862, "step": 7685 }, { "epoch": 0.2752520278618368, "grad_norm": 1.6018571853637695, "learning_rate": 0.00017007840094488387, "loss": 1.7953, "step": 7686 }, { "epoch": 0.2752878399914051, "grad_norm": 1.353165626525879, "learning_rate": 0.00017007012604486525, "loss": 1.6099, "step": 7687 }, { "epoch": 0.2753236521209734, "grad_norm": 1.3602633476257324, "learning_rate": 0.0001700618502021434, "loss": 1.5703, "step": 7688 }, { "epoch": 0.27535946425054164, "grad_norm": 1.2508971691131592, "learning_rate": 0.00017005357341682979, "loss": 1.6003, "step": 7689 }, { "epoch": 0.27539527638010997, "grad_norm": 1.8892033100128174, "learning_rate": 0.0001700452956890357, "loss": 1.7102, "step": 7690 }, { "epoch": 0.27543108850967823, "grad_norm": 1.4698898792266846, "learning_rate": 0.0001700370170188725, "loss": 1.6364, "step": 7691 }, { "epoch": 0.2754669006392465, "grad_norm": 1.6167817115783691, "learning_rate": 0.00017002873740645157, "loss": 1.7467, "step": 7692 }, { "epoch": 0.2755027127688148, "grad_norm": 1.442500352859497, "learning_rate": 0.00017002045685188431, "loss": 1.4752, "step": 7693 }, { "epoch": 0.2755385248983831, "grad_norm": 1.7867764234542847, "learning_rate": 0.00017001217535528215, "loss": 1.5237, "step": 7694 }, { "epoch": 0.27557433702795137, "grad_norm": 1.4577064514160156, "learning_rate": 0.00017000389291675644, "loss": 1.683, "step": 7695 }, { "epoch": 0.27561014915751963, "grad_norm": 1.7021827697753906, "learning_rate": 0.00016999560953641867, "loss": 1.7309, "step": 7696 }, { "epoch": 0.27564596128708796, "grad_norm": 1.3312740325927734, "learning_rate": 0.00016998732521438024, "loss": 1.2283, "step": 7697 }, { "epoch": 0.27568177341665623, "grad_norm": 1.972090482711792, "learning_rate": 0.00016997903995075265, "loss": 1.3293, "step": 7698 }, { "epoch": 0.2757175855462245, "grad_norm": 1.9187233448028564, "learning_rate": 0.00016997075374564733, "loss": 1.3078, "step": 7699 }, { "epoch": 0.27575339767579277, "grad_norm": 1.3226217031478882, "learning_rate": 0.00016996246659917578, "loss": 1.3863, "step": 7700 }, { "epoch": 0.2757892098053611, "grad_norm": 2.0401716232299805, "learning_rate": 0.0001699541785114495, "loss": 1.4268, "step": 7701 }, { "epoch": 0.27582502193492936, "grad_norm": 1.7860567569732666, "learning_rate": 0.00016994588948257997, "loss": 1.358, "step": 7702 }, { "epoch": 0.27586083406449763, "grad_norm": 1.6787052154541016, "learning_rate": 0.0001699375995126787, "loss": 1.6829, "step": 7703 }, { "epoch": 0.27589664619406595, "grad_norm": 1.7264974117279053, "learning_rate": 0.00016992930860185726, "loss": 1.5136, "step": 7704 }, { "epoch": 0.2759324583236342, "grad_norm": 1.6791878938674927, "learning_rate": 0.0001699210167502272, "loss": 1.3216, "step": 7705 }, { "epoch": 0.2759682704532025, "grad_norm": 1.2904554605484009, "learning_rate": 0.00016991272395790007, "loss": 1.3571, "step": 7706 }, { "epoch": 0.27600408258277076, "grad_norm": 2.196139335632324, "learning_rate": 0.00016990443022498735, "loss": 1.5075, "step": 7707 }, { "epoch": 0.2760398947123391, "grad_norm": 1.6233375072479248, "learning_rate": 0.0001698961355516007, "loss": 1.5892, "step": 7708 }, { "epoch": 0.27607570684190735, "grad_norm": 1.7111704349517822, "learning_rate": 0.00016988783993785177, "loss": 1.6294, "step": 7709 }, { "epoch": 0.2761115189714756, "grad_norm": 1.633927345275879, "learning_rate": 0.00016987954338385202, "loss": 1.3068, "step": 7710 }, { "epoch": 0.27614733110104395, "grad_norm": 1.6509286165237427, "learning_rate": 0.0001698712458897132, "loss": 1.5484, "step": 7711 }, { "epoch": 0.2761831432306122, "grad_norm": 1.8952949047088623, "learning_rate": 0.0001698629474555469, "loss": 1.4961, "step": 7712 }, { "epoch": 0.2762189553601805, "grad_norm": 2.397242546081543, "learning_rate": 0.00016985464808146473, "loss": 1.8338, "step": 7713 }, { "epoch": 0.27625476748974875, "grad_norm": 1.9433248043060303, "learning_rate": 0.0001698463477675784, "loss": 1.5395, "step": 7714 }, { "epoch": 0.2762905796193171, "grad_norm": 2.032348394393921, "learning_rate": 0.00016983804651399956, "loss": 1.5128, "step": 7715 }, { "epoch": 0.27632639174888535, "grad_norm": 2.3550405502319336, "learning_rate": 0.00016982974432083986, "loss": 1.4864, "step": 7716 }, { "epoch": 0.2763622038784536, "grad_norm": 1.410760521888733, "learning_rate": 0.00016982144118821103, "loss": 1.6236, "step": 7717 }, { "epoch": 0.27639801600802194, "grad_norm": 1.9511107206344604, "learning_rate": 0.0001698131371162248, "loss": 1.7513, "step": 7718 }, { "epoch": 0.2764338281375902, "grad_norm": 1.6737018823623657, "learning_rate": 0.00016980483210499286, "loss": 1.5436, "step": 7719 }, { "epoch": 0.2764696402671585, "grad_norm": 1.807410478591919, "learning_rate": 0.00016979652615462692, "loss": 1.773, "step": 7720 }, { "epoch": 0.27650545239672675, "grad_norm": 1.6671122312545776, "learning_rate": 0.00016978821926523873, "loss": 1.5451, "step": 7721 }, { "epoch": 0.27654126452629507, "grad_norm": 1.9303603172302246, "learning_rate": 0.00016977991143694014, "loss": 1.6225, "step": 7722 }, { "epoch": 0.27657707665586334, "grad_norm": 1.4524554014205933, "learning_rate": 0.00016977160266984283, "loss": 1.4761, "step": 7723 }, { "epoch": 0.2766128887854316, "grad_norm": 1.716299295425415, "learning_rate": 0.00016976329296405855, "loss": 1.3507, "step": 7724 }, { "epoch": 0.27664870091499993, "grad_norm": 1.3396046161651611, "learning_rate": 0.0001697549823196992, "loss": 1.7153, "step": 7725 }, { "epoch": 0.2766845130445682, "grad_norm": 1.991355299949646, "learning_rate": 0.00016974667073687655, "loss": 1.648, "step": 7726 }, { "epoch": 0.27672032517413647, "grad_norm": 1.623319149017334, "learning_rate": 0.00016973835821570236, "loss": 1.5506, "step": 7727 }, { "epoch": 0.27675613730370474, "grad_norm": 1.4190425872802734, "learning_rate": 0.00016973004475628856, "loss": 1.4248, "step": 7728 }, { "epoch": 0.27679194943327307, "grad_norm": 1.6432379484176636, "learning_rate": 0.00016972173035874693, "loss": 1.7124, "step": 7729 }, { "epoch": 0.27682776156284133, "grad_norm": 1.4056947231292725, "learning_rate": 0.00016971341502318936, "loss": 1.4815, "step": 7730 }, { "epoch": 0.2768635736924096, "grad_norm": 1.9628760814666748, "learning_rate": 0.00016970509874972774, "loss": 1.7848, "step": 7731 }, { "epoch": 0.27689938582197793, "grad_norm": 1.8493858575820923, "learning_rate": 0.0001696967815384739, "loss": 1.6528, "step": 7732 }, { "epoch": 0.2769351979515462, "grad_norm": 1.7261875867843628, "learning_rate": 0.0001696884633895398, "loss": 1.5981, "step": 7733 }, { "epoch": 0.27697101008111447, "grad_norm": 1.6887271404266357, "learning_rate": 0.00016968014430303728, "loss": 1.6599, "step": 7734 }, { "epoch": 0.27700682221068273, "grad_norm": 1.777239203453064, "learning_rate": 0.0001696718242790783, "loss": 1.4756, "step": 7735 }, { "epoch": 0.27704263434025106, "grad_norm": 1.546141266822815, "learning_rate": 0.0001696635033177748, "loss": 1.551, "step": 7736 }, { "epoch": 0.27707844646981933, "grad_norm": 1.5476120710372925, "learning_rate": 0.00016965518141923874, "loss": 1.6505, "step": 7737 }, { "epoch": 0.2771142585993876, "grad_norm": 1.809834361076355, "learning_rate": 0.00016964685858358202, "loss": 1.8532, "step": 7738 }, { "epoch": 0.2771500707289559, "grad_norm": 2.0858800411224365, "learning_rate": 0.0001696385348109167, "loss": 1.7213, "step": 7739 }, { "epoch": 0.2771858828585242, "grad_norm": 1.580114722251892, "learning_rate": 0.0001696302101013547, "loss": 1.6748, "step": 7740 }, { "epoch": 0.27722169498809246, "grad_norm": 1.333909273147583, "learning_rate": 0.00016962188445500807, "loss": 1.632, "step": 7741 }, { "epoch": 0.27725750711766073, "grad_norm": 1.7078574895858765, "learning_rate": 0.00016961355787198875, "loss": 1.7498, "step": 7742 }, { "epoch": 0.27729331924722905, "grad_norm": 1.3296380043029785, "learning_rate": 0.00016960523035240883, "loss": 1.5679, "step": 7743 }, { "epoch": 0.2773291313767973, "grad_norm": 1.2145541906356812, "learning_rate": 0.0001695969018963803, "loss": 1.5629, "step": 7744 }, { "epoch": 0.2773649435063656, "grad_norm": 1.5315901041030884, "learning_rate": 0.00016958857250401525, "loss": 1.7075, "step": 7745 }, { "epoch": 0.2774007556359339, "grad_norm": 1.5283602476119995, "learning_rate": 0.0001695802421754257, "loss": 1.7226, "step": 7746 }, { "epoch": 0.2774365677655022, "grad_norm": 1.7161786556243896, "learning_rate": 0.00016957191091072376, "loss": 1.4375, "step": 7747 }, { "epoch": 0.27747237989507045, "grad_norm": 1.6843448877334595, "learning_rate": 0.0001695635787100215, "loss": 1.3808, "step": 7748 }, { "epoch": 0.2775081920246387, "grad_norm": 1.336690068244934, "learning_rate": 0.000169555245573431, "loss": 1.3474, "step": 7749 }, { "epoch": 0.27754400415420705, "grad_norm": 1.7416538000106812, "learning_rate": 0.0001695469115010644, "loss": 1.2874, "step": 7750 }, { "epoch": 0.2775798162837753, "grad_norm": 1.2422138452529907, "learning_rate": 0.00016953857649303381, "loss": 1.6665, "step": 7751 }, { "epoch": 0.2776156284133436, "grad_norm": 1.4898710250854492, "learning_rate": 0.00016953024054945138, "loss": 1.3389, "step": 7752 }, { "epoch": 0.2776514405429119, "grad_norm": 2.7302865982055664, "learning_rate": 0.00016952190367042926, "loss": 1.5453, "step": 7753 }, { "epoch": 0.2776872526724802, "grad_norm": 3.756218671798706, "learning_rate": 0.0001695135658560796, "loss": 2.0241, "step": 7754 }, { "epoch": 0.27772306480204845, "grad_norm": 1.5968868732452393, "learning_rate": 0.00016950522710651455, "loss": 1.335, "step": 7755 }, { "epoch": 0.2777588769316167, "grad_norm": 1.5735868215560913, "learning_rate": 0.00016949688742184637, "loss": 1.5448, "step": 7756 }, { "epoch": 0.27779468906118504, "grad_norm": 1.4668728113174438, "learning_rate": 0.0001694885468021872, "loss": 1.5775, "step": 7757 }, { "epoch": 0.2778305011907533, "grad_norm": 1.89346182346344, "learning_rate": 0.00016948020524764924, "loss": 1.3931, "step": 7758 }, { "epoch": 0.2778663133203216, "grad_norm": 1.6620376110076904, "learning_rate": 0.00016947186275834475, "loss": 1.6433, "step": 7759 }, { "epoch": 0.2779021254498899, "grad_norm": 1.8120241165161133, "learning_rate": 0.00016946351933438595, "loss": 1.418, "step": 7760 }, { "epoch": 0.27793793757945817, "grad_norm": 1.5897480249404907, "learning_rate": 0.00016945517497588512, "loss": 1.5138, "step": 7761 }, { "epoch": 0.27797374970902644, "grad_norm": 2.4386966228485107, "learning_rate": 0.00016944682968295452, "loss": 1.5283, "step": 7762 }, { "epoch": 0.2780095618385947, "grad_norm": 1.8611398935317993, "learning_rate": 0.00016943848345570638, "loss": 1.6865, "step": 7763 }, { "epoch": 0.27804537396816303, "grad_norm": 1.441057562828064, "learning_rate": 0.00016943013629425302, "loss": 1.453, "step": 7764 }, { "epoch": 0.2780811860977313, "grad_norm": 1.4636635780334473, "learning_rate": 0.00016942178819870672, "loss": 1.2875, "step": 7765 }, { "epoch": 0.27811699822729957, "grad_norm": 1.454779028892517, "learning_rate": 0.00016941343916917982, "loss": 1.6621, "step": 7766 }, { "epoch": 0.2781528103568679, "grad_norm": 1.4269800186157227, "learning_rate": 0.00016940508920578463, "loss": 1.7387, "step": 7767 }, { "epoch": 0.27818862248643617, "grad_norm": 2.06813645362854, "learning_rate": 0.00016939673830863348, "loss": 1.4947, "step": 7768 }, { "epoch": 0.27822443461600443, "grad_norm": 1.9147825241088867, "learning_rate": 0.00016938838647783877, "loss": 1.5197, "step": 7769 }, { "epoch": 0.2782602467455727, "grad_norm": 2.2611420154571533, "learning_rate": 0.00016938003371351278, "loss": 1.7869, "step": 7770 }, { "epoch": 0.27829605887514103, "grad_norm": 2.2714648246765137, "learning_rate": 0.00016937168001576795, "loss": 1.4491, "step": 7771 }, { "epoch": 0.2783318710047093, "grad_norm": 1.5648062229156494, "learning_rate": 0.00016936332538471666, "loss": 1.3759, "step": 7772 }, { "epoch": 0.27836768313427757, "grad_norm": 1.8871150016784668, "learning_rate": 0.00016935496982047128, "loss": 1.9112, "step": 7773 }, { "epoch": 0.2784034952638459, "grad_norm": 1.4186395406723022, "learning_rate": 0.00016934661332314424, "loss": 1.7234, "step": 7774 }, { "epoch": 0.27843930739341416, "grad_norm": 1.551112174987793, "learning_rate": 0.000169338255892848, "loss": 1.5181, "step": 7775 }, { "epoch": 0.27847511952298243, "grad_norm": 1.92075514793396, "learning_rate": 0.00016932989752969495, "loss": 1.5922, "step": 7776 }, { "epoch": 0.2785109316525507, "grad_norm": 1.918872356414795, "learning_rate": 0.00016932153823379754, "loss": 1.6461, "step": 7777 }, { "epoch": 0.278546743782119, "grad_norm": 1.9076799154281616, "learning_rate": 0.00016931317800526828, "loss": 1.9765, "step": 7778 }, { "epoch": 0.2785825559116873, "grad_norm": 1.9921611547470093, "learning_rate": 0.0001693048168442196, "loss": 1.6124, "step": 7779 }, { "epoch": 0.27861836804125556, "grad_norm": 1.2875360250473022, "learning_rate": 0.000169296454750764, "loss": 1.4644, "step": 7780 }, { "epoch": 0.2786541801708239, "grad_norm": 1.5666717290878296, "learning_rate": 0.00016928809172501397, "loss": 1.8456, "step": 7781 }, { "epoch": 0.27868999230039215, "grad_norm": 2.1061506271362305, "learning_rate": 0.00016927972776708208, "loss": 1.3571, "step": 7782 }, { "epoch": 0.2787258044299604, "grad_norm": 1.7013448476791382, "learning_rate": 0.0001692713628770808, "loss": 1.1378, "step": 7783 }, { "epoch": 0.2787616165595287, "grad_norm": 2.2516982555389404, "learning_rate": 0.00016926299705512273, "loss": 1.2538, "step": 7784 }, { "epoch": 0.278797428689097, "grad_norm": 1.5621269941329956, "learning_rate": 0.0001692546303013203, "loss": 1.8246, "step": 7785 }, { "epoch": 0.2788332408186653, "grad_norm": 2.3607735633850098, "learning_rate": 0.0001692462626157862, "loss": 1.4497, "step": 7786 }, { "epoch": 0.27886905294823355, "grad_norm": 2.1050877571105957, "learning_rate": 0.00016923789399863294, "loss": 1.4979, "step": 7787 }, { "epoch": 0.2789048650778019, "grad_norm": 1.711255431175232, "learning_rate": 0.00016922952444997313, "loss": 1.5194, "step": 7788 }, { "epoch": 0.27894067720737015, "grad_norm": 1.6491427421569824, "learning_rate": 0.00016922115396991939, "loss": 1.4156, "step": 7789 }, { "epoch": 0.2789764893369384, "grad_norm": 1.5196592807769775, "learning_rate": 0.00016921278255858425, "loss": 1.4432, "step": 7790 }, { "epoch": 0.2790123014665067, "grad_norm": 1.4955500364303589, "learning_rate": 0.00016920441021608048, "loss": 1.806, "step": 7791 }, { "epoch": 0.279048113596075, "grad_norm": 1.6842684745788574, "learning_rate": 0.0001691960369425206, "loss": 1.628, "step": 7792 }, { "epoch": 0.2790839257256433, "grad_norm": 1.859779953956604, "learning_rate": 0.0001691876627380173, "loss": 1.4083, "step": 7793 }, { "epoch": 0.27911973785521155, "grad_norm": 1.8693050146102905, "learning_rate": 0.00016917928760268325, "loss": 1.6231, "step": 7794 }, { "epoch": 0.27915554998477987, "grad_norm": 1.3143736124038696, "learning_rate": 0.0001691709115366311, "loss": 1.5244, "step": 7795 }, { "epoch": 0.27919136211434814, "grad_norm": 1.5413780212402344, "learning_rate": 0.00016916253453997358, "loss": 1.6463, "step": 7796 }, { "epoch": 0.2792271742439164, "grad_norm": 1.8122531175613403, "learning_rate": 0.00016915415661282335, "loss": 1.5876, "step": 7797 }, { "epoch": 0.2792629863734847, "grad_norm": 2.2389962673187256, "learning_rate": 0.00016914577775529316, "loss": 1.3309, "step": 7798 }, { "epoch": 0.279298798503053, "grad_norm": 1.547379732131958, "learning_rate": 0.0001691373979674957, "loss": 1.9148, "step": 7799 }, { "epoch": 0.27933461063262127, "grad_norm": 1.9751660823822021, "learning_rate": 0.00016912901724954377, "loss": 1.7473, "step": 7800 }, { "epoch": 0.27937042276218954, "grad_norm": 1.5748672485351562, "learning_rate": 0.00016912063560155005, "loss": 1.5901, "step": 7801 }, { "epoch": 0.27940623489175787, "grad_norm": 2.0997776985168457, "learning_rate": 0.00016911225302362738, "loss": 1.5381, "step": 7802 }, { "epoch": 0.27944204702132613, "grad_norm": 2.0922765731811523, "learning_rate": 0.00016910386951588845, "loss": 1.7502, "step": 7803 }, { "epoch": 0.2794778591508944, "grad_norm": 1.5022773742675781, "learning_rate": 0.0001690954850784461, "loss": 1.9076, "step": 7804 }, { "epoch": 0.27951367128046267, "grad_norm": 1.3663434982299805, "learning_rate": 0.00016908709971141312, "loss": 1.5557, "step": 7805 }, { "epoch": 0.279549483410031, "grad_norm": 2.1710376739501953, "learning_rate": 0.00016907871341490235, "loss": 1.6564, "step": 7806 }, { "epoch": 0.27958529553959927, "grad_norm": 2.2670748233795166, "learning_rate": 0.00016907032618902661, "loss": 1.3921, "step": 7807 }, { "epoch": 0.27962110766916753, "grad_norm": 1.7037529945373535, "learning_rate": 0.00016906193803389868, "loss": 1.6198, "step": 7808 }, { "epoch": 0.27965691979873586, "grad_norm": 2.3128836154937744, "learning_rate": 0.00016905354894963147, "loss": 1.8036, "step": 7809 }, { "epoch": 0.27969273192830413, "grad_norm": 1.735877275466919, "learning_rate": 0.00016904515893633785, "loss": 1.5839, "step": 7810 }, { "epoch": 0.2797285440578724, "grad_norm": 1.5727331638336182, "learning_rate": 0.0001690367679941307, "loss": 1.4397, "step": 7811 }, { "epoch": 0.27976435618744067, "grad_norm": 1.5470871925354004, "learning_rate": 0.00016902837612312285, "loss": 1.8064, "step": 7812 }, { "epoch": 0.279800168317009, "grad_norm": 1.5642430782318115, "learning_rate": 0.00016901998332342726, "loss": 1.6186, "step": 7813 }, { "epoch": 0.27983598044657726, "grad_norm": 1.3714781999588013, "learning_rate": 0.00016901158959515682, "loss": 1.5868, "step": 7814 }, { "epoch": 0.27987179257614553, "grad_norm": 1.6559131145477295, "learning_rate": 0.00016900319493842446, "loss": 1.3897, "step": 7815 }, { "epoch": 0.27990760470571385, "grad_norm": 2.2328150272369385, "learning_rate": 0.00016899479935334307, "loss": 2.1561, "step": 7816 }, { "epoch": 0.2799434168352821, "grad_norm": 1.605811357498169, "learning_rate": 0.0001689864028400257, "loss": 1.535, "step": 7817 }, { "epoch": 0.2799792289648504, "grad_norm": 1.7465509176254272, "learning_rate": 0.00016897800539858527, "loss": 1.5555, "step": 7818 }, { "epoch": 0.28001504109441866, "grad_norm": 1.5020278692245483, "learning_rate": 0.00016896960702913476, "loss": 1.6375, "step": 7819 }, { "epoch": 0.280050853223987, "grad_norm": 1.255519151687622, "learning_rate": 0.00016896120773178712, "loss": 1.7075, "step": 7820 }, { "epoch": 0.28008666535355525, "grad_norm": 1.8907737731933594, "learning_rate": 0.00016895280750665542, "loss": 1.7681, "step": 7821 }, { "epoch": 0.2801224774831235, "grad_norm": 1.6590030193328857, "learning_rate": 0.0001689444063538526, "loss": 1.5601, "step": 7822 }, { "epoch": 0.2801582896126918, "grad_norm": 2.1344289779663086, "learning_rate": 0.00016893600427349173, "loss": 1.3434, "step": 7823 }, { "epoch": 0.2801941017422601, "grad_norm": 1.558802843093872, "learning_rate": 0.00016892760126568584, "loss": 1.4994, "step": 7824 }, { "epoch": 0.2802299138718284, "grad_norm": 1.822415828704834, "learning_rate": 0.00016891919733054802, "loss": 1.5621, "step": 7825 }, { "epoch": 0.28026572600139665, "grad_norm": 1.2544628381729126, "learning_rate": 0.00016891079246819128, "loss": 1.4667, "step": 7826 }, { "epoch": 0.280301538130965, "grad_norm": 1.3893107175827026, "learning_rate": 0.0001689023866787287, "loss": 1.6372, "step": 7827 }, { "epoch": 0.28033735026053325, "grad_norm": 1.3773599863052368, "learning_rate": 0.00016889397996227342, "loss": 1.5978, "step": 7828 }, { "epoch": 0.2803731623901015, "grad_norm": 1.4866869449615479, "learning_rate": 0.00016888557231893846, "loss": 1.5474, "step": 7829 }, { "epoch": 0.2804089745196698, "grad_norm": 1.25416100025177, "learning_rate": 0.00016887716374883703, "loss": 1.5429, "step": 7830 }, { "epoch": 0.2804447866492381, "grad_norm": 1.3411985635757446, "learning_rate": 0.0001688687542520822, "loss": 1.601, "step": 7831 }, { "epoch": 0.2804805987788064, "grad_norm": 1.2994372844696045, "learning_rate": 0.0001688603438287871, "loss": 1.4822, "step": 7832 }, { "epoch": 0.28051641090837465, "grad_norm": 2.017587900161743, "learning_rate": 0.00016885193247906488, "loss": 1.3413, "step": 7833 }, { "epoch": 0.28055222303794297, "grad_norm": 1.8447659015655518, "learning_rate": 0.00016884352020302875, "loss": 1.7149, "step": 7834 }, { "epoch": 0.28058803516751124, "grad_norm": 1.4341516494750977, "learning_rate": 0.00016883510700079182, "loss": 1.5375, "step": 7835 }, { "epoch": 0.2806238472970795, "grad_norm": 1.4541009664535522, "learning_rate": 0.00016882669287246734, "loss": 1.5869, "step": 7836 }, { "epoch": 0.2806596594266478, "grad_norm": 1.8895456790924072, "learning_rate": 0.0001688182778181685, "loss": 1.647, "step": 7837 }, { "epoch": 0.2806954715562161, "grad_norm": 2.3410048484802246, "learning_rate": 0.0001688098618380085, "loss": 1.3165, "step": 7838 }, { "epoch": 0.28073128368578437, "grad_norm": 1.4924992322921753, "learning_rate": 0.00016880144493210052, "loss": 1.551, "step": 7839 }, { "epoch": 0.28076709581535264, "grad_norm": 1.6559271812438965, "learning_rate": 0.00016879302710055792, "loss": 1.5487, "step": 7840 }, { "epoch": 0.28080290794492097, "grad_norm": 1.4658722877502441, "learning_rate": 0.0001687846083434938, "loss": 1.7468, "step": 7841 }, { "epoch": 0.28083872007448923, "grad_norm": 1.6023316383361816, "learning_rate": 0.00016877618866102155, "loss": 1.6474, "step": 7842 }, { "epoch": 0.2808745322040575, "grad_norm": 2.7606184482574463, "learning_rate": 0.0001687677680532544, "loss": 1.3463, "step": 7843 }, { "epoch": 0.28091034433362577, "grad_norm": 2.0281307697296143, "learning_rate": 0.00016875934652030563, "loss": 1.5334, "step": 7844 }, { "epoch": 0.2809461564631941, "grad_norm": 1.7015283107757568, "learning_rate": 0.00016875092406228853, "loss": 1.4474, "step": 7845 }, { "epoch": 0.28098196859276237, "grad_norm": 2.321729898452759, "learning_rate": 0.00016874250067931644, "loss": 1.7248, "step": 7846 }, { "epoch": 0.28101778072233063, "grad_norm": 1.3899301290512085, "learning_rate": 0.00016873407637150268, "loss": 1.5622, "step": 7847 }, { "epoch": 0.28105359285189896, "grad_norm": 2.0998470783233643, "learning_rate": 0.00016872565113896056, "loss": 1.318, "step": 7848 }, { "epoch": 0.28108940498146723, "grad_norm": 1.7129113674163818, "learning_rate": 0.00016871722498180346, "loss": 1.6792, "step": 7849 }, { "epoch": 0.2811252171110355, "grad_norm": 1.6951273679733276, "learning_rate": 0.00016870879790014474, "loss": 1.506, "step": 7850 }, { "epoch": 0.28116102924060377, "grad_norm": 2.1859934329986572, "learning_rate": 0.00016870036989409778, "loss": 1.6351, "step": 7851 }, { "epoch": 0.2811968413701721, "grad_norm": 2.049006938934326, "learning_rate": 0.00016869194096377597, "loss": 1.5945, "step": 7852 }, { "epoch": 0.28123265349974036, "grad_norm": 1.6370213031768799, "learning_rate": 0.00016868351110929268, "loss": 1.6024, "step": 7853 }, { "epoch": 0.28126846562930863, "grad_norm": 1.693914532661438, "learning_rate": 0.00016867508033076135, "loss": 1.2703, "step": 7854 }, { "epoch": 0.28130427775887695, "grad_norm": 1.6464945077896118, "learning_rate": 0.00016866664862829543, "loss": 1.5552, "step": 7855 }, { "epoch": 0.2813400898884452, "grad_norm": 1.5109620094299316, "learning_rate": 0.00016865821600200827, "loss": 1.7248, "step": 7856 }, { "epoch": 0.2813759020180135, "grad_norm": 1.7957203388214111, "learning_rate": 0.0001686497824520134, "loss": 1.8003, "step": 7857 }, { "epoch": 0.28141171414758176, "grad_norm": 1.7840375900268555, "learning_rate": 0.00016864134797842426, "loss": 1.4401, "step": 7858 }, { "epoch": 0.2814475262771501, "grad_norm": 1.9683198928833008, "learning_rate": 0.00016863291258135434, "loss": 1.4671, "step": 7859 }, { "epoch": 0.28148333840671835, "grad_norm": 1.9993942975997925, "learning_rate": 0.00016862447626091707, "loss": 1.7088, "step": 7860 }, { "epoch": 0.2815191505362866, "grad_norm": 1.76691472530365, "learning_rate": 0.00016861603901722601, "loss": 1.3172, "step": 7861 }, { "epoch": 0.28155496266585495, "grad_norm": 1.4511359930038452, "learning_rate": 0.00016860760085039467, "loss": 1.5278, "step": 7862 }, { "epoch": 0.2815907747954232, "grad_norm": 1.7963536977767944, "learning_rate": 0.00016859916176053657, "loss": 1.6062, "step": 7863 }, { "epoch": 0.2816265869249915, "grad_norm": 1.900141954421997, "learning_rate": 0.00016859072174776522, "loss": 1.3176, "step": 7864 }, { "epoch": 0.28166239905455975, "grad_norm": 1.7314437627792358, "learning_rate": 0.00016858228081219416, "loss": 1.5749, "step": 7865 }, { "epoch": 0.2816982111841281, "grad_norm": 2.220306396484375, "learning_rate": 0.000168573838953937, "loss": 1.5796, "step": 7866 }, { "epoch": 0.28173402331369635, "grad_norm": 1.7966647148132324, "learning_rate": 0.00016856539617310728, "loss": 1.5018, "step": 7867 }, { "epoch": 0.2817698354432646, "grad_norm": 1.964325189590454, "learning_rate": 0.0001685569524698186, "loss": 1.7829, "step": 7868 }, { "epoch": 0.28180564757283294, "grad_norm": 1.6538739204406738, "learning_rate": 0.00016854850784418457, "loss": 1.8246, "step": 7869 }, { "epoch": 0.2818414597024012, "grad_norm": 1.633487343788147, "learning_rate": 0.00016854006229631877, "loss": 1.674, "step": 7870 }, { "epoch": 0.2818772718319695, "grad_norm": 2.0294923782348633, "learning_rate": 0.00016853161582633486, "loss": 1.8599, "step": 7871 }, { "epoch": 0.28191308396153775, "grad_norm": 1.7625004053115845, "learning_rate": 0.00016852316843434645, "loss": 1.2676, "step": 7872 }, { "epoch": 0.28194889609110607, "grad_norm": 1.4743856191635132, "learning_rate": 0.0001685147201204672, "loss": 1.8254, "step": 7873 }, { "epoch": 0.28198470822067434, "grad_norm": 1.7030346393585205, "learning_rate": 0.00016850627088481077, "loss": 1.4622, "step": 7874 }, { "epoch": 0.2820205203502426, "grad_norm": 1.8332545757293701, "learning_rate": 0.0001684978207274908, "loss": 1.6707, "step": 7875 }, { "epoch": 0.28205633247981093, "grad_norm": 1.432371973991394, "learning_rate": 0.00016848936964862106, "loss": 1.4167, "step": 7876 }, { "epoch": 0.2820921446093792, "grad_norm": 1.7265770435333252, "learning_rate": 0.00016848091764831518, "loss": 1.4469, "step": 7877 }, { "epoch": 0.28212795673894747, "grad_norm": 3.417351484298706, "learning_rate": 0.00016847246472668684, "loss": 1.5023, "step": 7878 }, { "epoch": 0.28216376886851574, "grad_norm": 1.2689472436904907, "learning_rate": 0.00016846401088384987, "loss": 1.5067, "step": 7879 }, { "epoch": 0.28219958099808407, "grad_norm": 2.011307954788208, "learning_rate": 0.0001684555561199179, "loss": 1.605, "step": 7880 }, { "epoch": 0.28223539312765233, "grad_norm": 2.8057658672332764, "learning_rate": 0.00016844710043500478, "loss": 1.6136, "step": 7881 }, { "epoch": 0.2822712052572206, "grad_norm": 1.857756495475769, "learning_rate": 0.00016843864382922418, "loss": 1.6513, "step": 7882 }, { "epoch": 0.2823070173867889, "grad_norm": 1.6902923583984375, "learning_rate": 0.0001684301863026899, "loss": 1.29, "step": 7883 }, { "epoch": 0.2823428295163572, "grad_norm": 2.0259835720062256, "learning_rate": 0.00016842172785551572, "loss": 1.7171, "step": 7884 }, { "epoch": 0.28237864164592547, "grad_norm": 1.9104828834533691, "learning_rate": 0.00016841326848781546, "loss": 1.6705, "step": 7885 }, { "epoch": 0.28241445377549373, "grad_norm": 2.1479547023773193, "learning_rate": 0.00016840480819970294, "loss": 1.5749, "step": 7886 }, { "epoch": 0.28245026590506206, "grad_norm": 1.6750024557113647, "learning_rate": 0.00016839634699129197, "loss": 1.4534, "step": 7887 }, { "epoch": 0.28248607803463033, "grad_norm": 1.583740472793579, "learning_rate": 0.00016838788486269634, "loss": 1.3206, "step": 7888 }, { "epoch": 0.2825218901641986, "grad_norm": 1.3861204385757446, "learning_rate": 0.00016837942181402993, "loss": 1.3911, "step": 7889 }, { "epoch": 0.2825577022937669, "grad_norm": 1.6452444791793823, "learning_rate": 0.00016837095784540663, "loss": 1.5608, "step": 7890 }, { "epoch": 0.2825935144233352, "grad_norm": 1.4533544778823853, "learning_rate": 0.0001683624929569403, "loss": 1.7149, "step": 7891 }, { "epoch": 0.28262932655290346, "grad_norm": 1.6614084243774414, "learning_rate": 0.0001683540271487448, "loss": 1.4509, "step": 7892 }, { "epoch": 0.28266513868247173, "grad_norm": 1.9571036100387573, "learning_rate": 0.000168345560420934, "loss": 1.6858, "step": 7893 }, { "epoch": 0.28270095081204005, "grad_norm": 1.823486328125, "learning_rate": 0.00016833709277362186, "loss": 1.788, "step": 7894 }, { "epoch": 0.2827367629416083, "grad_norm": 1.6518898010253906, "learning_rate": 0.0001683286242069223, "loss": 1.4248, "step": 7895 }, { "epoch": 0.2827725750711766, "grad_norm": 1.626325011253357, "learning_rate": 0.00016832015472094923, "loss": 1.203, "step": 7896 }, { "epoch": 0.2828083872007449, "grad_norm": 1.368032693862915, "learning_rate": 0.0001683116843158166, "loss": 1.3924, "step": 7897 }, { "epoch": 0.2828441993303132, "grad_norm": 1.738690972328186, "learning_rate": 0.00016830321299163837, "loss": 1.7386, "step": 7898 }, { "epoch": 0.28288001145988145, "grad_norm": 1.9578567743301392, "learning_rate": 0.0001682947407485285, "loss": 1.4225, "step": 7899 }, { "epoch": 0.2829158235894497, "grad_norm": 2.169372320175171, "learning_rate": 0.00016828626758660104, "loss": 1.7834, "step": 7900 }, { "epoch": 0.28295163571901805, "grad_norm": 2.070221185684204, "learning_rate": 0.00016827779350596988, "loss": 1.6165, "step": 7901 }, { "epoch": 0.2829874478485863, "grad_norm": 1.5898553133010864, "learning_rate": 0.00016826931850674913, "loss": 1.7263, "step": 7902 }, { "epoch": 0.2830232599781546, "grad_norm": 1.5495221614837646, "learning_rate": 0.0001682608425890527, "loss": 1.4515, "step": 7903 }, { "epoch": 0.2830590721077229, "grad_norm": 1.3812370300292969, "learning_rate": 0.00016825236575299473, "loss": 1.3405, "step": 7904 }, { "epoch": 0.2830948842372912, "grad_norm": 1.676615595817566, "learning_rate": 0.0001682438879986892, "loss": 1.32, "step": 7905 }, { "epoch": 0.28313069636685945, "grad_norm": 1.6965440511703491, "learning_rate": 0.0001682354093262502, "loss": 1.5504, "step": 7906 }, { "epoch": 0.2831665084964277, "grad_norm": 1.978925108909607, "learning_rate": 0.00016822692973579177, "loss": 1.4036, "step": 7907 }, { "epoch": 0.28320232062599604, "grad_norm": 2.1448557376861572, "learning_rate": 0.000168218449227428, "loss": 1.7598, "step": 7908 }, { "epoch": 0.2832381327555643, "grad_norm": 1.9719595909118652, "learning_rate": 0.00016820996780127302, "loss": 1.3712, "step": 7909 }, { "epoch": 0.2832739448851326, "grad_norm": 1.57223379611969, "learning_rate": 0.00016820148545744089, "loss": 1.5271, "step": 7910 }, { "epoch": 0.2833097570147009, "grad_norm": 1.6346989870071411, "learning_rate": 0.00016819300219604572, "loss": 1.6543, "step": 7911 }, { "epoch": 0.28334556914426917, "grad_norm": 1.9346641302108765, "learning_rate": 0.00016818451801720169, "loss": 1.643, "step": 7912 }, { "epoch": 0.28338138127383744, "grad_norm": 1.87030827999115, "learning_rate": 0.00016817603292102292, "loss": 1.7069, "step": 7913 }, { "epoch": 0.2834171934034057, "grad_norm": 1.740334391593933, "learning_rate": 0.00016816754690762356, "loss": 1.7116, "step": 7914 }, { "epoch": 0.28345300553297403, "grad_norm": 2.0844287872314453, "learning_rate": 0.0001681590599771178, "loss": 1.5254, "step": 7915 }, { "epoch": 0.2834888176625423, "grad_norm": 1.4731221199035645, "learning_rate": 0.00016815057212961985, "loss": 1.5977, "step": 7916 }, { "epoch": 0.28352462979211057, "grad_norm": 1.638742208480835, "learning_rate": 0.0001681420833652438, "loss": 1.5354, "step": 7917 }, { "epoch": 0.2835604419216789, "grad_norm": 1.5350745916366577, "learning_rate": 0.00016813359368410394, "loss": 1.6585, "step": 7918 }, { "epoch": 0.28359625405124717, "grad_norm": 1.2498204708099365, "learning_rate": 0.00016812510308631445, "loss": 1.312, "step": 7919 }, { "epoch": 0.28363206618081543, "grad_norm": 1.752829670906067, "learning_rate": 0.00016811661157198956, "loss": 1.3958, "step": 7920 }, { "epoch": 0.2836678783103837, "grad_norm": 1.6889894008636475, "learning_rate": 0.00016810811914124354, "loss": 1.5042, "step": 7921 }, { "epoch": 0.283703690439952, "grad_norm": 2.0191566944122314, "learning_rate": 0.00016809962579419064, "loss": 1.523, "step": 7922 }, { "epoch": 0.2837395025695203, "grad_norm": 2.0825445652008057, "learning_rate": 0.0001680911315309451, "loss": 1.5253, "step": 7923 }, { "epoch": 0.28377531469908857, "grad_norm": 1.8054052591323853, "learning_rate": 0.00016808263635162123, "loss": 1.7837, "step": 7924 }, { "epoch": 0.2838111268286569, "grad_norm": 1.8791615962982178, "learning_rate": 0.0001680741402563333, "loss": 1.2332, "step": 7925 }, { "epoch": 0.28384693895822516, "grad_norm": 1.7158640623092651, "learning_rate": 0.00016806564324519565, "loss": 1.5696, "step": 7926 }, { "epoch": 0.28388275108779343, "grad_norm": 1.4893388748168945, "learning_rate": 0.00016805714531832253, "loss": 1.5782, "step": 7927 }, { "epoch": 0.2839185632173617, "grad_norm": 2.08494234085083, "learning_rate": 0.00016804864647582832, "loss": 1.8085, "step": 7928 }, { "epoch": 0.28395437534693, "grad_norm": 1.590214729309082, "learning_rate": 0.00016804014671782736, "loss": 1.7791, "step": 7929 }, { "epoch": 0.2839901874764983, "grad_norm": 1.7456499338150024, "learning_rate": 0.00016803164604443395, "loss": 1.2424, "step": 7930 }, { "epoch": 0.28402599960606656, "grad_norm": 2.2353994846343994, "learning_rate": 0.00016802314445576254, "loss": 1.2298, "step": 7931 }, { "epoch": 0.2840618117356349, "grad_norm": 1.718406319618225, "learning_rate": 0.00016801464195192746, "loss": 1.6377, "step": 7932 }, { "epoch": 0.28409762386520315, "grad_norm": 1.8352092504501343, "learning_rate": 0.00016800613853304311, "loss": 1.3559, "step": 7933 }, { "epoch": 0.2841334359947714, "grad_norm": 1.6804618835449219, "learning_rate": 0.00016799763419922387, "loss": 1.8663, "step": 7934 }, { "epoch": 0.2841692481243397, "grad_norm": 1.5867995023727417, "learning_rate": 0.00016798912895058416, "loss": 1.4769, "step": 7935 }, { "epoch": 0.284205060253908, "grad_norm": 1.8360810279846191, "learning_rate": 0.00016798062278723845, "loss": 1.6387, "step": 7936 }, { "epoch": 0.2842408723834763, "grad_norm": 1.573351263999939, "learning_rate": 0.00016797211570930115, "loss": 1.6323, "step": 7937 }, { "epoch": 0.28427668451304455, "grad_norm": 1.9696694612503052, "learning_rate": 0.0001679636077168867, "loss": 1.8065, "step": 7938 }, { "epoch": 0.2843124966426129, "grad_norm": 1.245781421661377, "learning_rate": 0.00016795509881010955, "loss": 1.4907, "step": 7939 }, { "epoch": 0.28434830877218115, "grad_norm": 1.5631426572799683, "learning_rate": 0.00016794658898908424, "loss": 1.5878, "step": 7940 }, { "epoch": 0.2843841209017494, "grad_norm": 2.6311140060424805, "learning_rate": 0.00016793807825392517, "loss": 1.6975, "step": 7941 }, { "epoch": 0.2844199330313177, "grad_norm": 1.682794213294983, "learning_rate": 0.00016792956660474694, "loss": 1.7496, "step": 7942 }, { "epoch": 0.284455745160886, "grad_norm": 1.3867418766021729, "learning_rate": 0.00016792105404166404, "loss": 1.6555, "step": 7943 }, { "epoch": 0.2844915572904543, "grad_norm": 2.540458917617798, "learning_rate": 0.00016791254056479092, "loss": 1.2869, "step": 7944 }, { "epoch": 0.28452736942002255, "grad_norm": 1.879603624343872, "learning_rate": 0.00016790402617424216, "loss": 1.645, "step": 7945 }, { "epoch": 0.28456318154959087, "grad_norm": 1.6672354936599731, "learning_rate": 0.00016789551087013232, "loss": 1.6987, "step": 7946 }, { "epoch": 0.28459899367915914, "grad_norm": 1.5219227075576782, "learning_rate": 0.00016788699465257597, "loss": 1.6638, "step": 7947 }, { "epoch": 0.2846348058087274, "grad_norm": 1.9220548868179321, "learning_rate": 0.00016787847752168769, "loss": 1.3491, "step": 7948 }, { "epoch": 0.2846706179382957, "grad_norm": 1.5979007482528687, "learning_rate": 0.00016786995947758204, "loss": 1.5428, "step": 7949 }, { "epoch": 0.284706430067864, "grad_norm": 1.6197651624679565, "learning_rate": 0.00016786144052037365, "loss": 1.416, "step": 7950 }, { "epoch": 0.28474224219743227, "grad_norm": 1.597205400466919, "learning_rate": 0.00016785292065017707, "loss": 1.3962, "step": 7951 }, { "epoch": 0.28477805432700054, "grad_norm": 2.123845100402832, "learning_rate": 0.000167844399867107, "loss": 1.3666, "step": 7952 }, { "epoch": 0.28481386645656886, "grad_norm": 1.9097387790679932, "learning_rate": 0.00016783587817127804, "loss": 1.3416, "step": 7953 }, { "epoch": 0.28484967858613713, "grad_norm": 1.6967304944992065, "learning_rate": 0.00016782735556280484, "loss": 1.6285, "step": 7954 }, { "epoch": 0.2848854907157054, "grad_norm": 1.3434966802597046, "learning_rate": 0.00016781883204180207, "loss": 1.5585, "step": 7955 }, { "epoch": 0.28492130284527367, "grad_norm": 2.0665807723999023, "learning_rate": 0.00016781030760838436, "loss": 1.9691, "step": 7956 }, { "epoch": 0.284957114974842, "grad_norm": 1.8598235845565796, "learning_rate": 0.00016780178226266646, "loss": 1.6914, "step": 7957 }, { "epoch": 0.28499292710441027, "grad_norm": 1.6443570852279663, "learning_rate": 0.00016779325600476303, "loss": 1.7151, "step": 7958 }, { "epoch": 0.28502873923397853, "grad_norm": 2.839176893234253, "learning_rate": 0.00016778472883478878, "loss": 1.4785, "step": 7959 }, { "epoch": 0.28506455136354686, "grad_norm": 1.6859476566314697, "learning_rate": 0.00016777620075285847, "loss": 1.3766, "step": 7960 }, { "epoch": 0.2851003634931151, "grad_norm": 2.1881790161132812, "learning_rate": 0.00016776767175908676, "loss": 1.8795, "step": 7961 }, { "epoch": 0.2851361756226834, "grad_norm": 1.856256127357483, "learning_rate": 0.00016775914185358846, "loss": 1.7113, "step": 7962 }, { "epoch": 0.28517198775225167, "grad_norm": 2.153108596801758, "learning_rate": 0.00016775061103647834, "loss": 1.6182, "step": 7963 }, { "epoch": 0.28520779988182, "grad_norm": 1.3774833679199219, "learning_rate": 0.00016774207930787108, "loss": 1.5698, "step": 7964 }, { "epoch": 0.28524361201138826, "grad_norm": 2.905151844024658, "learning_rate": 0.00016773354666788155, "loss": 1.2458, "step": 7965 }, { "epoch": 0.28527942414095653, "grad_norm": 1.9551284313201904, "learning_rate": 0.00016772501311662454, "loss": 1.6533, "step": 7966 }, { "epoch": 0.28531523627052485, "grad_norm": 1.6111496686935425, "learning_rate": 0.00016771647865421483, "loss": 1.5987, "step": 7967 }, { "epoch": 0.2853510484000931, "grad_norm": 1.438314437866211, "learning_rate": 0.00016770794328076726, "loss": 1.626, "step": 7968 }, { "epoch": 0.2853868605296614, "grad_norm": 1.7360272407531738, "learning_rate": 0.00016769940699639662, "loss": 1.5362, "step": 7969 }, { "epoch": 0.28542267265922966, "grad_norm": 1.3850613832473755, "learning_rate": 0.0001676908698012178, "loss": 1.7958, "step": 7970 }, { "epoch": 0.285458484788798, "grad_norm": 1.258893370628357, "learning_rate": 0.0001676823316953456, "loss": 1.0779, "step": 7971 }, { "epoch": 0.28549429691836625, "grad_norm": 1.8675183057785034, "learning_rate": 0.00016767379267889498, "loss": 1.5385, "step": 7972 }, { "epoch": 0.2855301090479345, "grad_norm": 1.3767646551132202, "learning_rate": 0.00016766525275198078, "loss": 1.6422, "step": 7973 }, { "epoch": 0.28556592117750285, "grad_norm": 1.6324293613433838, "learning_rate": 0.00016765671191471785, "loss": 1.5077, "step": 7974 }, { "epoch": 0.2856017333070711, "grad_norm": 1.361929178237915, "learning_rate": 0.00016764817016722114, "loss": 1.6237, "step": 7975 }, { "epoch": 0.2856375454366394, "grad_norm": 1.4396480321884155, "learning_rate": 0.00016763962750960558, "loss": 1.6242, "step": 7976 }, { "epoch": 0.28567335756620765, "grad_norm": 1.8851977586746216, "learning_rate": 0.00016763108394198605, "loss": 1.7705, "step": 7977 }, { "epoch": 0.285709169695776, "grad_norm": 1.8253602981567383, "learning_rate": 0.00016762253946447757, "loss": 1.6693, "step": 7978 }, { "epoch": 0.28574498182534425, "grad_norm": 1.959054946899414, "learning_rate": 0.000167613994077195, "loss": 1.499, "step": 7979 }, { "epoch": 0.2857807939549125, "grad_norm": 1.7831599712371826, "learning_rate": 0.00016760544778025337, "loss": 1.8024, "step": 7980 }, { "epoch": 0.28581660608448084, "grad_norm": 2.5011425018310547, "learning_rate": 0.00016759690057376769, "loss": 1.7597, "step": 7981 }, { "epoch": 0.2858524182140491, "grad_norm": 1.647387146949768, "learning_rate": 0.00016758835245785284, "loss": 1.6501, "step": 7982 }, { "epoch": 0.2858882303436174, "grad_norm": 1.6591894626617432, "learning_rate": 0.00016757980343262393, "loss": 1.8044, "step": 7983 }, { "epoch": 0.28592404247318565, "grad_norm": 1.6702752113342285, "learning_rate": 0.00016757125349819592, "loss": 1.8583, "step": 7984 }, { "epoch": 0.28595985460275397, "grad_norm": 1.6072109937667847, "learning_rate": 0.00016756270265468385, "loss": 1.392, "step": 7985 }, { "epoch": 0.28599566673232224, "grad_norm": 1.9452677965164185, "learning_rate": 0.00016755415090220278, "loss": 1.5837, "step": 7986 }, { "epoch": 0.2860314788618905, "grad_norm": 1.7112654447555542, "learning_rate": 0.00016754559824086774, "loss": 1.3601, "step": 7987 }, { "epoch": 0.28606729099145883, "grad_norm": 1.639289140701294, "learning_rate": 0.00016753704467079383, "loss": 1.7134, "step": 7988 }, { "epoch": 0.2861031031210271, "grad_norm": 1.4900970458984375, "learning_rate": 0.00016752849019209607, "loss": 1.4638, "step": 7989 }, { "epoch": 0.28613891525059537, "grad_norm": 1.5364689826965332, "learning_rate": 0.00016751993480488956, "loss": 1.6713, "step": 7990 }, { "epoch": 0.28617472738016364, "grad_norm": 1.6079754829406738, "learning_rate": 0.0001675113785092895, "loss": 1.8136, "step": 7991 }, { "epoch": 0.28621053950973196, "grad_norm": 1.4302982091903687, "learning_rate": 0.00016750282130541084, "loss": 1.4623, "step": 7992 }, { "epoch": 0.28624635163930023, "grad_norm": 1.5606993436813354, "learning_rate": 0.00016749426319336884, "loss": 1.5872, "step": 7993 }, { "epoch": 0.2862821637688685, "grad_norm": 1.3386200666427612, "learning_rate": 0.00016748570417327857, "loss": 1.5573, "step": 7994 }, { "epoch": 0.2863179758984368, "grad_norm": 2.041780710220337, "learning_rate": 0.0001674771442452552, "loss": 1.6269, "step": 7995 }, { "epoch": 0.2863537880280051, "grad_norm": 1.6531713008880615, "learning_rate": 0.0001674685834094139, "loss": 1.5946, "step": 7996 }, { "epoch": 0.28638960015757337, "grad_norm": 1.9872679710388184, "learning_rate": 0.00016746002166586984, "loss": 1.5185, "step": 7997 }, { "epoch": 0.28642541228714163, "grad_norm": 1.2475894689559937, "learning_rate": 0.00016745145901473819, "loss": 1.3904, "step": 7998 }, { "epoch": 0.28646122441670996, "grad_norm": 1.5508625507354736, "learning_rate": 0.0001674428954561342, "loss": 1.4844, "step": 7999 }, { "epoch": 0.2864970365462782, "grad_norm": 2.021813154220581, "learning_rate": 0.000167434330990173, "loss": 1.6274, "step": 8000 }, { "epoch": 0.2865328486758465, "grad_norm": 1.5985814332962036, "learning_rate": 0.0001674257656169699, "loss": 1.837, "step": 8001 }, { "epoch": 0.2865686608054148, "grad_norm": 1.310536503791809, "learning_rate": 0.00016741719933664008, "loss": 1.5341, "step": 8002 }, { "epoch": 0.2866044729349831, "grad_norm": 2.1644248962402344, "learning_rate": 0.00016740863214929883, "loss": 1.4358, "step": 8003 }, { "epoch": 0.28664028506455136, "grad_norm": 1.359682559967041, "learning_rate": 0.00016740006405506133, "loss": 1.4231, "step": 8004 }, { "epoch": 0.28667609719411963, "grad_norm": 1.2971854209899902, "learning_rate": 0.00016739149505404298, "loss": 1.2328, "step": 8005 }, { "epoch": 0.28671190932368795, "grad_norm": 1.5941914319992065, "learning_rate": 0.00016738292514635893, "loss": 1.3761, "step": 8006 }, { "epoch": 0.2867477214532562, "grad_norm": 1.3879280090332031, "learning_rate": 0.0001673743543321246, "loss": 1.5472, "step": 8007 }, { "epoch": 0.2867835335828245, "grad_norm": 2.025636911392212, "learning_rate": 0.00016736578261145518, "loss": 1.7866, "step": 8008 }, { "epoch": 0.2868193457123928, "grad_norm": 1.9905762672424316, "learning_rate": 0.00016735720998446607, "loss": 1.451, "step": 8009 }, { "epoch": 0.2868551578419611, "grad_norm": 2.0516388416290283, "learning_rate": 0.0001673486364512726, "loss": 1.5917, "step": 8010 }, { "epoch": 0.28689096997152935, "grad_norm": 1.2939916849136353, "learning_rate": 0.00016734006201199006, "loss": 1.2998, "step": 8011 }, { "epoch": 0.2869267821010976, "grad_norm": 1.6238223314285278, "learning_rate": 0.00016733148666673388, "loss": 1.3898, "step": 8012 }, { "epoch": 0.28696259423066595, "grad_norm": 2.0216140747070312, "learning_rate": 0.0001673229104156194, "loss": 1.5983, "step": 8013 }, { "epoch": 0.2869984063602342, "grad_norm": 1.5885149240493774, "learning_rate": 0.000167314333258762, "loss": 1.5381, "step": 8014 }, { "epoch": 0.2870342184898025, "grad_norm": 2.034376382827759, "learning_rate": 0.00016730575519627707, "loss": 1.442, "step": 8015 }, { "epoch": 0.2870700306193708, "grad_norm": 1.7644431591033936, "learning_rate": 0.00016729717622828002, "loss": 1.4961, "step": 8016 }, { "epoch": 0.2871058427489391, "grad_norm": 1.5184577703475952, "learning_rate": 0.00016728859635488626, "loss": 1.536, "step": 8017 }, { "epoch": 0.28714165487850735, "grad_norm": 1.647737741470337, "learning_rate": 0.00016728001557621126, "loss": 1.5551, "step": 8018 }, { "epoch": 0.2871774670080756, "grad_norm": 1.5250535011291504, "learning_rate": 0.00016727143389237042, "loss": 1.4027, "step": 8019 }, { "epoch": 0.28721327913764394, "grad_norm": 1.6826478242874146, "learning_rate": 0.0001672628513034792, "loss": 1.6263, "step": 8020 }, { "epoch": 0.2872490912672122, "grad_norm": 1.4399031400680542, "learning_rate": 0.0001672542678096531, "loss": 1.6192, "step": 8021 }, { "epoch": 0.2872849033967805, "grad_norm": 1.9906154870986938, "learning_rate": 0.00016724568341100758, "loss": 1.4139, "step": 8022 }, { "epoch": 0.28732071552634875, "grad_norm": 1.7506561279296875, "learning_rate": 0.0001672370981076581, "loss": 1.4831, "step": 8023 }, { "epoch": 0.28735652765591707, "grad_norm": 1.703379511833191, "learning_rate": 0.00016722851189972024, "loss": 1.5373, "step": 8024 }, { "epoch": 0.28739233978548534, "grad_norm": 1.5690869092941284, "learning_rate": 0.00016721992478730942, "loss": 1.2874, "step": 8025 }, { "epoch": 0.2874281519150536, "grad_norm": 1.3549939393997192, "learning_rate": 0.00016721133677054123, "loss": 1.3596, "step": 8026 }, { "epoch": 0.28746396404462193, "grad_norm": 1.2308684587478638, "learning_rate": 0.00016720274784953122, "loss": 1.1399, "step": 8027 }, { "epoch": 0.2874997761741902, "grad_norm": 1.5450139045715332, "learning_rate": 0.00016719415802439493, "loss": 1.5061, "step": 8028 }, { "epoch": 0.28753558830375847, "grad_norm": 1.6292673349380493, "learning_rate": 0.0001671855672952479, "loss": 1.4167, "step": 8029 }, { "epoch": 0.28757140043332674, "grad_norm": 2.0085721015930176, "learning_rate": 0.00016717697566220573, "loss": 1.6847, "step": 8030 }, { "epoch": 0.28760721256289506, "grad_norm": 1.688120722770691, "learning_rate": 0.00016716838312538402, "loss": 1.5717, "step": 8031 }, { "epoch": 0.28764302469246333, "grad_norm": 1.3048220872879028, "learning_rate": 0.00016715978968489834, "loss": 1.5839, "step": 8032 }, { "epoch": 0.2876788368220316, "grad_norm": 1.4469718933105469, "learning_rate": 0.0001671511953408643, "loss": 1.5844, "step": 8033 }, { "epoch": 0.2877146489515999, "grad_norm": 1.7788381576538086, "learning_rate": 0.0001671426000933976, "loss": 1.5919, "step": 8034 }, { "epoch": 0.2877504610811682, "grad_norm": 1.2445861101150513, "learning_rate": 0.00016713400394261378, "loss": 1.5703, "step": 8035 }, { "epoch": 0.28778627321073647, "grad_norm": 1.5097942352294922, "learning_rate": 0.00016712540688862854, "loss": 1.7577, "step": 8036 }, { "epoch": 0.28782208534030473, "grad_norm": 1.4741946458816528, "learning_rate": 0.0001671168089315575, "loss": 1.7221, "step": 8037 }, { "epoch": 0.28785789746987306, "grad_norm": 2.001183271408081, "learning_rate": 0.00016710821007151646, "loss": 1.6299, "step": 8038 }, { "epoch": 0.2878937095994413, "grad_norm": 2.835008144378662, "learning_rate": 0.00016709961030862092, "loss": 1.3872, "step": 8039 }, { "epoch": 0.2879295217290096, "grad_norm": 2.880967140197754, "learning_rate": 0.00016709100964298673, "loss": 1.097, "step": 8040 }, { "epoch": 0.2879653338585779, "grad_norm": 1.6155633926391602, "learning_rate": 0.00016708240807472956, "loss": 1.6969, "step": 8041 }, { "epoch": 0.2880011459881462, "grad_norm": 1.329376459121704, "learning_rate": 0.00016707380560396508, "loss": 1.7448, "step": 8042 }, { "epoch": 0.28803695811771446, "grad_norm": 2.0664658546447754, "learning_rate": 0.0001670652022308091, "loss": 1.4754, "step": 8043 }, { "epoch": 0.28807277024728273, "grad_norm": 1.5695208311080933, "learning_rate": 0.0001670565979553773, "loss": 1.6802, "step": 8044 }, { "epoch": 0.28810858237685105, "grad_norm": 1.879258155822754, "learning_rate": 0.0001670479927777855, "loss": 1.3501, "step": 8045 }, { "epoch": 0.2881443945064193, "grad_norm": 2.0454368591308594, "learning_rate": 0.0001670393866981494, "loss": 1.6627, "step": 8046 }, { "epoch": 0.2881802066359876, "grad_norm": 1.3214797973632812, "learning_rate": 0.00016703077971658487, "loss": 1.484, "step": 8047 }, { "epoch": 0.2882160187655559, "grad_norm": 1.7491061687469482, "learning_rate": 0.00016702217183320762, "loss": 1.7092, "step": 8048 }, { "epoch": 0.2882518308951242, "grad_norm": 1.3021421432495117, "learning_rate": 0.00016701356304813357, "loss": 1.4552, "step": 8049 }, { "epoch": 0.28828764302469245, "grad_norm": 1.3362562656402588, "learning_rate": 0.00016700495336147841, "loss": 1.4673, "step": 8050 }, { "epoch": 0.2883234551542607, "grad_norm": 1.692622184753418, "learning_rate": 0.00016699634277335805, "loss": 1.4147, "step": 8051 }, { "epoch": 0.28835926728382905, "grad_norm": 2.1186437606811523, "learning_rate": 0.00016698773128388832, "loss": 1.7805, "step": 8052 }, { "epoch": 0.2883950794133973, "grad_norm": 1.411199688911438, "learning_rate": 0.00016697911889318508, "loss": 1.4397, "step": 8053 }, { "epoch": 0.2884308915429656, "grad_norm": 1.3372883796691895, "learning_rate": 0.00016697050560136417, "loss": 1.3642, "step": 8054 }, { "epoch": 0.2884667036725339, "grad_norm": 1.3349575996398926, "learning_rate": 0.0001669618914085415, "loss": 1.6066, "step": 8055 }, { "epoch": 0.2885025158021022, "grad_norm": 1.5699543952941895, "learning_rate": 0.00016695327631483298, "loss": 1.9668, "step": 8056 }, { "epoch": 0.28853832793167045, "grad_norm": 1.399040699005127, "learning_rate": 0.00016694466032035447, "loss": 1.5224, "step": 8057 }, { "epoch": 0.2885741400612387, "grad_norm": 1.3568496704101562, "learning_rate": 0.0001669360434252219, "loss": 1.5744, "step": 8058 }, { "epoch": 0.28860995219080704, "grad_norm": 3.9023430347442627, "learning_rate": 0.00016692742562955123, "loss": 1.4893, "step": 8059 }, { "epoch": 0.2886457643203753, "grad_norm": 1.27892005443573, "learning_rate": 0.00016691880693345837, "loss": 1.5439, "step": 8060 }, { "epoch": 0.2886815764499436, "grad_norm": 1.1989108324050903, "learning_rate": 0.00016691018733705926, "loss": 1.4919, "step": 8061 }, { "epoch": 0.2887173885795119, "grad_norm": 1.627945899963379, "learning_rate": 0.00016690156684046991, "loss": 1.5184, "step": 8062 }, { "epoch": 0.28875320070908017, "grad_norm": 1.6738954782485962, "learning_rate": 0.00016689294544380628, "loss": 1.6135, "step": 8063 }, { "epoch": 0.28878901283864844, "grad_norm": 1.9493415355682373, "learning_rate": 0.00016688432314718434, "loss": 1.5187, "step": 8064 }, { "epoch": 0.2888248249682167, "grad_norm": 1.8097580671310425, "learning_rate": 0.0001668756999507201, "loss": 1.7367, "step": 8065 }, { "epoch": 0.28886063709778503, "grad_norm": 2.6163599491119385, "learning_rate": 0.00016686707585452962, "loss": 1.8102, "step": 8066 }, { "epoch": 0.2888964492273533, "grad_norm": 1.294272780418396, "learning_rate": 0.00016685845085872883, "loss": 1.2674, "step": 8067 }, { "epoch": 0.28893226135692157, "grad_norm": 1.7067070007324219, "learning_rate": 0.00016684982496343386, "loss": 1.9238, "step": 8068 }, { "epoch": 0.2889680734864899, "grad_norm": 1.9256243705749512, "learning_rate": 0.0001668411981687607, "loss": 1.2723, "step": 8069 }, { "epoch": 0.28900388561605816, "grad_norm": 1.8836833238601685, "learning_rate": 0.00016683257047482548, "loss": 1.9504, "step": 8070 }, { "epoch": 0.28903969774562643, "grad_norm": 1.2621315717697144, "learning_rate": 0.0001668239418817442, "loss": 1.5191, "step": 8071 }, { "epoch": 0.2890755098751947, "grad_norm": 1.624903678894043, "learning_rate": 0.000166815312389633, "loss": 1.3441, "step": 8072 }, { "epoch": 0.289111322004763, "grad_norm": 1.787150502204895, "learning_rate": 0.00016680668199860793, "loss": 1.7283, "step": 8073 }, { "epoch": 0.2891471341343313, "grad_norm": 1.5423107147216797, "learning_rate": 0.00016679805070878514, "loss": 1.5735, "step": 8074 }, { "epoch": 0.28918294626389957, "grad_norm": 1.5319451093673706, "learning_rate": 0.00016678941852028075, "loss": 1.754, "step": 8075 }, { "epoch": 0.2892187583934679, "grad_norm": 1.78415846824646, "learning_rate": 0.0001667807854332109, "loss": 1.6037, "step": 8076 }, { "epoch": 0.28925457052303616, "grad_norm": 1.5288525819778442, "learning_rate": 0.0001667721514476917, "loss": 1.7794, "step": 8077 }, { "epoch": 0.2892903826526044, "grad_norm": 1.83223295211792, "learning_rate": 0.0001667635165638393, "loss": 1.4731, "step": 8078 }, { "epoch": 0.2893261947821727, "grad_norm": 1.4526853561401367, "learning_rate": 0.00016675488078176994, "loss": 1.65, "step": 8079 }, { "epoch": 0.289362006911741, "grad_norm": 1.5872565507888794, "learning_rate": 0.00016674624410159978, "loss": 1.5926, "step": 8080 }, { "epoch": 0.2893978190413093, "grad_norm": 1.7032769918441772, "learning_rate": 0.000166737606523445, "loss": 1.8127, "step": 8081 }, { "epoch": 0.28943363117087756, "grad_norm": 1.5613534450531006, "learning_rate": 0.00016672896804742178, "loss": 1.8724, "step": 8082 }, { "epoch": 0.2894694433004459, "grad_norm": 1.4352047443389893, "learning_rate": 0.00016672032867364638, "loss": 1.6871, "step": 8083 }, { "epoch": 0.28950525543001415, "grad_norm": 1.5885288715362549, "learning_rate": 0.00016671168840223503, "loss": 1.5456, "step": 8084 }, { "epoch": 0.2895410675595824, "grad_norm": 1.3646939992904663, "learning_rate": 0.00016670304723330397, "loss": 1.3528, "step": 8085 }, { "epoch": 0.2895768796891507, "grad_norm": 2.0885651111602783, "learning_rate": 0.00016669440516696945, "loss": 1.3586, "step": 8086 }, { "epoch": 0.289612691818719, "grad_norm": 1.4972714185714722, "learning_rate": 0.0001666857622033477, "loss": 1.6482, "step": 8087 }, { "epoch": 0.2896485039482873, "grad_norm": 1.8946317434310913, "learning_rate": 0.00016667711834255505, "loss": 1.672, "step": 8088 }, { "epoch": 0.28968431607785555, "grad_norm": 1.687410593032837, "learning_rate": 0.0001666684735847078, "loss": 1.1767, "step": 8089 }, { "epoch": 0.2897201282074239, "grad_norm": 1.3974437713623047, "learning_rate": 0.00016665982792992226, "loss": 1.5356, "step": 8090 }, { "epoch": 0.28975594033699215, "grad_norm": 1.5038459300994873, "learning_rate": 0.00016665118137831468, "loss": 1.2653, "step": 8091 }, { "epoch": 0.2897917524665604, "grad_norm": 1.456130862236023, "learning_rate": 0.00016664253393000144, "loss": 1.8139, "step": 8092 }, { "epoch": 0.2898275645961287, "grad_norm": 1.6186647415161133, "learning_rate": 0.00016663388558509887, "loss": 1.5897, "step": 8093 }, { "epoch": 0.289863376725697, "grad_norm": 1.3481247425079346, "learning_rate": 0.00016662523634372334, "loss": 1.4714, "step": 8094 }, { "epoch": 0.2898991888552653, "grad_norm": 1.415844440460205, "learning_rate": 0.00016661658620599113, "loss": 1.6026, "step": 8095 }, { "epoch": 0.28993500098483355, "grad_norm": 2.165412187576294, "learning_rate": 0.00016660793517201875, "loss": 1.6079, "step": 8096 }, { "epoch": 0.28997081311440187, "grad_norm": 1.5296587944030762, "learning_rate": 0.00016659928324192248, "loss": 1.7825, "step": 8097 }, { "epoch": 0.29000662524397014, "grad_norm": 1.6164709329605103, "learning_rate": 0.0001665906304158188, "loss": 1.5188, "step": 8098 }, { "epoch": 0.2900424373735384, "grad_norm": 1.5287069082260132, "learning_rate": 0.00016658197669382405, "loss": 1.579, "step": 8099 }, { "epoch": 0.2900782495031067, "grad_norm": 2.6942615509033203, "learning_rate": 0.0001665733220760547, "loss": 1.3303, "step": 8100 }, { "epoch": 0.290114061632675, "grad_norm": 2.589672327041626, "learning_rate": 0.00016656466656262718, "loss": 1.4543, "step": 8101 }, { "epoch": 0.29014987376224327, "grad_norm": 1.4460633993148804, "learning_rate": 0.00016655601015365794, "loss": 1.6278, "step": 8102 }, { "epoch": 0.29018568589181154, "grad_norm": 1.2138534784317017, "learning_rate": 0.00016654735284926341, "loss": 1.57, "step": 8103 }, { "epoch": 0.29022149802137986, "grad_norm": 1.3130338191986084, "learning_rate": 0.00016653869464956008, "loss": 1.5317, "step": 8104 }, { "epoch": 0.29025731015094813, "grad_norm": 2.141824245452881, "learning_rate": 0.00016653003555466448, "loss": 1.3303, "step": 8105 }, { "epoch": 0.2902931222805164, "grad_norm": 1.5391119718551636, "learning_rate": 0.00016652137556469305, "loss": 1.3035, "step": 8106 }, { "epoch": 0.29032893441008467, "grad_norm": 2.6114346981048584, "learning_rate": 0.00016651271467976232, "loss": 1.6472, "step": 8107 }, { "epoch": 0.290364746539653, "grad_norm": 2.207442283630371, "learning_rate": 0.0001665040528999888, "loss": 1.5477, "step": 8108 }, { "epoch": 0.29040055866922126, "grad_norm": 1.4280242919921875, "learning_rate": 0.00016649539022548903, "loss": 1.5803, "step": 8109 }, { "epoch": 0.29043637079878953, "grad_norm": 1.6368767023086548, "learning_rate": 0.00016648672665637958, "loss": 1.4957, "step": 8110 }, { "epoch": 0.29047218292835786, "grad_norm": 1.8318397998809814, "learning_rate": 0.00016647806219277698, "loss": 1.4815, "step": 8111 }, { "epoch": 0.2905079950579261, "grad_norm": 2.205343723297119, "learning_rate": 0.0001664693968347978, "loss": 1.8598, "step": 8112 }, { "epoch": 0.2905438071874944, "grad_norm": 2.0023486614227295, "learning_rate": 0.00016646073058255862, "loss": 1.3708, "step": 8113 }, { "epoch": 0.29057961931706267, "grad_norm": 1.8614946603775024, "learning_rate": 0.00016645206343617603, "loss": 1.7015, "step": 8114 }, { "epoch": 0.290615431446631, "grad_norm": 1.6110183000564575, "learning_rate": 0.00016644339539576664, "loss": 1.5513, "step": 8115 }, { "epoch": 0.29065124357619926, "grad_norm": 1.5081207752227783, "learning_rate": 0.0001664347264614471, "loss": 1.6817, "step": 8116 }, { "epoch": 0.2906870557057675, "grad_norm": 1.4741617441177368, "learning_rate": 0.000166426056633334, "loss": 1.7121, "step": 8117 }, { "epoch": 0.29072286783533585, "grad_norm": 1.6981256008148193, "learning_rate": 0.00016641738591154396, "loss": 1.626, "step": 8118 }, { "epoch": 0.2907586799649041, "grad_norm": 1.4795587062835693, "learning_rate": 0.00016640871429619372, "loss": 1.5249, "step": 8119 }, { "epoch": 0.2907944920944724, "grad_norm": 1.8192358016967773, "learning_rate": 0.00016640004178739985, "loss": 1.5308, "step": 8120 }, { "epoch": 0.29083030422404066, "grad_norm": 1.5716443061828613, "learning_rate": 0.0001663913683852791, "loss": 1.1341, "step": 8121 }, { "epoch": 0.290866116353609, "grad_norm": 1.5312237739562988, "learning_rate": 0.00016638269408994808, "loss": 1.5524, "step": 8122 }, { "epoch": 0.29090192848317725, "grad_norm": 1.443957805633545, "learning_rate": 0.00016637401890152358, "loss": 1.4105, "step": 8123 }, { "epoch": 0.2909377406127455, "grad_norm": 1.9536917209625244, "learning_rate": 0.00016636534282012225, "loss": 1.4532, "step": 8124 }, { "epoch": 0.29097355274231385, "grad_norm": 2.228641986846924, "learning_rate": 0.00016635666584586083, "loss": 1.5409, "step": 8125 }, { "epoch": 0.2910093648718821, "grad_norm": 2.049088716506958, "learning_rate": 0.00016634798797885607, "loss": 1.6185, "step": 8126 }, { "epoch": 0.2910451770014504, "grad_norm": 1.800985336303711, "learning_rate": 0.00016633930921922474, "loss": 1.6347, "step": 8127 }, { "epoch": 0.29108098913101865, "grad_norm": 1.634249210357666, "learning_rate": 0.00016633062956708354, "loss": 1.4489, "step": 8128 }, { "epoch": 0.291116801260587, "grad_norm": 1.9593605995178223, "learning_rate": 0.0001663219490225493, "loss": 1.4183, "step": 8129 }, { "epoch": 0.29115261339015525, "grad_norm": 1.6916444301605225, "learning_rate": 0.0001663132675857388, "loss": 1.8659, "step": 8130 }, { "epoch": 0.2911884255197235, "grad_norm": 1.6571805477142334, "learning_rate": 0.0001663045852567688, "loss": 1.648, "step": 8131 }, { "epoch": 0.29122423764929184, "grad_norm": 1.6843384504318237, "learning_rate": 0.00016629590203575613, "loss": 1.2479, "step": 8132 }, { "epoch": 0.2912600497788601, "grad_norm": 1.5704199075698853, "learning_rate": 0.0001662872179228176, "loss": 1.6044, "step": 8133 }, { "epoch": 0.2912958619084284, "grad_norm": 2.2352945804595947, "learning_rate": 0.0001662785329180701, "loss": 1.6788, "step": 8134 }, { "epoch": 0.29133167403799665, "grad_norm": 1.5743358135223389, "learning_rate": 0.0001662698470216304, "loss": 1.4598, "step": 8135 }, { "epoch": 0.29136748616756497, "grad_norm": 1.7981449365615845, "learning_rate": 0.0001662611602336154, "loss": 1.5804, "step": 8136 }, { "epoch": 0.29140329829713324, "grad_norm": 1.3479498624801636, "learning_rate": 0.00016625247255414198, "loss": 1.7092, "step": 8137 }, { "epoch": 0.2914391104267015, "grad_norm": 1.1672223806381226, "learning_rate": 0.000166243783983327, "loss": 1.3365, "step": 8138 }, { "epoch": 0.29147492255626983, "grad_norm": 1.7496581077575684, "learning_rate": 0.00016623509452128732, "loss": 1.5851, "step": 8139 }, { "epoch": 0.2915107346858381, "grad_norm": 1.4864665269851685, "learning_rate": 0.00016622640416813988, "loss": 1.586, "step": 8140 }, { "epoch": 0.29154654681540637, "grad_norm": 1.4444739818572998, "learning_rate": 0.00016621771292400162, "loss": 1.5328, "step": 8141 }, { "epoch": 0.29158235894497464, "grad_norm": 1.6213339567184448, "learning_rate": 0.00016620902078898943, "loss": 1.2968, "step": 8142 }, { "epoch": 0.29161817107454296, "grad_norm": 1.5538610219955444, "learning_rate": 0.0001662003277632203, "loss": 1.5661, "step": 8143 }, { "epoch": 0.29165398320411123, "grad_norm": 2.134049654006958, "learning_rate": 0.0001661916338468111, "loss": 1.399, "step": 8144 }, { "epoch": 0.2916897953336795, "grad_norm": 2.0003087520599365, "learning_rate": 0.00016618293903987888, "loss": 1.6424, "step": 8145 }, { "epoch": 0.2917256074632478, "grad_norm": 2.1988790035247803, "learning_rate": 0.00016617424334254061, "loss": 1.5365, "step": 8146 }, { "epoch": 0.2917614195928161, "grad_norm": 1.5732612609863281, "learning_rate": 0.00016616554675491325, "loss": 1.8022, "step": 8147 }, { "epoch": 0.29179723172238436, "grad_norm": 1.4355159997940063, "learning_rate": 0.00016615684927711376, "loss": 1.4396, "step": 8148 }, { "epoch": 0.29183304385195263, "grad_norm": 1.6861153841018677, "learning_rate": 0.00016614815090925923, "loss": 1.5059, "step": 8149 }, { "epoch": 0.29186885598152096, "grad_norm": 1.6933960914611816, "learning_rate": 0.00016613945165146668, "loss": 1.6366, "step": 8150 }, { "epoch": 0.2919046681110892, "grad_norm": 1.724313735961914, "learning_rate": 0.00016613075150385308, "loss": 1.3966, "step": 8151 }, { "epoch": 0.2919404802406575, "grad_norm": 1.6139729022979736, "learning_rate": 0.00016612205046653554, "loss": 1.5069, "step": 8152 }, { "epoch": 0.2919762923702258, "grad_norm": 1.470168113708496, "learning_rate": 0.00016611334853963106, "loss": 1.2152, "step": 8153 }, { "epoch": 0.2920121044997941, "grad_norm": 1.4632880687713623, "learning_rate": 0.0001661046457232568, "loss": 1.2306, "step": 8154 }, { "epoch": 0.29204791662936236, "grad_norm": 1.752076268196106, "learning_rate": 0.00016609594201752982, "loss": 1.6839, "step": 8155 }, { "epoch": 0.2920837287589306, "grad_norm": 1.5564721822738647, "learning_rate": 0.00016608723742256719, "loss": 1.5811, "step": 8156 }, { "epoch": 0.29211954088849895, "grad_norm": 1.4910109043121338, "learning_rate": 0.00016607853193848597, "loss": 1.4991, "step": 8157 }, { "epoch": 0.2921553530180672, "grad_norm": 1.9942272901535034, "learning_rate": 0.0001660698255654034, "loss": 1.6296, "step": 8158 }, { "epoch": 0.2921911651476355, "grad_norm": 2.043497323989868, "learning_rate": 0.0001660611183034365, "loss": 1.7203, "step": 8159 }, { "epoch": 0.2922269772772038, "grad_norm": 1.740036129951477, "learning_rate": 0.00016605241015270247, "loss": 1.5992, "step": 8160 }, { "epoch": 0.2922627894067721, "grad_norm": 1.7537078857421875, "learning_rate": 0.0001660437011133185, "loss": 1.7468, "step": 8161 }, { "epoch": 0.29229860153634035, "grad_norm": 1.6161073446273804, "learning_rate": 0.0001660349911854017, "loss": 1.6065, "step": 8162 }, { "epoch": 0.2923344136659086, "grad_norm": 2.7794902324676514, "learning_rate": 0.0001660262803690693, "loss": 1.356, "step": 8163 }, { "epoch": 0.29237022579547695, "grad_norm": 1.438673496246338, "learning_rate": 0.00016601756866443845, "loss": 1.7374, "step": 8164 }, { "epoch": 0.2924060379250452, "grad_norm": 1.6174455881118774, "learning_rate": 0.00016600885607162636, "loss": 1.2237, "step": 8165 }, { "epoch": 0.2924418500546135, "grad_norm": 1.4131717681884766, "learning_rate": 0.00016600014259075024, "loss": 1.6027, "step": 8166 }, { "epoch": 0.2924776621841818, "grad_norm": 2.0395007133483887, "learning_rate": 0.00016599142822192736, "loss": 1.0541, "step": 8167 }, { "epoch": 0.2925134743137501, "grad_norm": 1.4359726905822754, "learning_rate": 0.00016598271296527494, "loss": 1.7906, "step": 8168 }, { "epoch": 0.29254928644331835, "grad_norm": 1.8879810571670532, "learning_rate": 0.00016597399682091024, "loss": 1.2775, "step": 8169 }, { "epoch": 0.2925850985728866, "grad_norm": 1.5420466661453247, "learning_rate": 0.00016596527978895046, "loss": 1.3398, "step": 8170 }, { "epoch": 0.29262091070245494, "grad_norm": 1.2738362550735474, "learning_rate": 0.00016595656186951297, "loss": 1.7642, "step": 8171 }, { "epoch": 0.2926567228320232, "grad_norm": 1.8173683881759644, "learning_rate": 0.00016594784306271502, "loss": 1.6041, "step": 8172 }, { "epoch": 0.2926925349615915, "grad_norm": 1.720488429069519, "learning_rate": 0.00016593912336867393, "loss": 1.3482, "step": 8173 }, { "epoch": 0.2927283470911598, "grad_norm": 2.0132124423980713, "learning_rate": 0.00016593040278750694, "loss": 1.5073, "step": 8174 }, { "epoch": 0.29276415922072807, "grad_norm": 1.5772103071212769, "learning_rate": 0.00016592168131933144, "loss": 1.519, "step": 8175 }, { "epoch": 0.29279997135029634, "grad_norm": 2.622490406036377, "learning_rate": 0.00016591295896426476, "loss": 1.2288, "step": 8176 }, { "epoch": 0.2928357834798646, "grad_norm": 1.7737879753112793, "learning_rate": 0.00016590423572242422, "loss": 1.5786, "step": 8177 }, { "epoch": 0.29287159560943293, "grad_norm": 1.4796202182769775, "learning_rate": 0.0001658955115939272, "loss": 1.3456, "step": 8178 }, { "epoch": 0.2929074077390012, "grad_norm": 2.105363368988037, "learning_rate": 0.00016588678657889112, "loss": 1.6187, "step": 8179 }, { "epoch": 0.29294321986856947, "grad_norm": 1.5410113334655762, "learning_rate": 0.00016587806067743327, "loss": 2.0069, "step": 8180 }, { "epoch": 0.2929790319981378, "grad_norm": 1.7439554929733276, "learning_rate": 0.00016586933388967109, "loss": 1.3726, "step": 8181 }, { "epoch": 0.29301484412770606, "grad_norm": 2.0335540771484375, "learning_rate": 0.000165860606215722, "loss": 1.5939, "step": 8182 }, { "epoch": 0.29305065625727433, "grad_norm": 1.8200868368148804, "learning_rate": 0.0001658518776557034, "loss": 1.2275, "step": 8183 }, { "epoch": 0.2930864683868426, "grad_norm": 1.4193452596664429, "learning_rate": 0.00016584314820973273, "loss": 1.4636, "step": 8184 }, { "epoch": 0.2931222805164109, "grad_norm": 1.3906633853912354, "learning_rate": 0.00016583441787792745, "loss": 1.3655, "step": 8185 }, { "epoch": 0.2931580926459792, "grad_norm": 1.5661295652389526, "learning_rate": 0.00016582568666040497, "loss": 1.8314, "step": 8186 }, { "epoch": 0.29319390477554746, "grad_norm": 1.2787010669708252, "learning_rate": 0.0001658169545572828, "loss": 1.4975, "step": 8187 }, { "epoch": 0.2932297169051158, "grad_norm": 1.9857454299926758, "learning_rate": 0.0001658082215686784, "loss": 1.3856, "step": 8188 }, { "epoch": 0.29326552903468406, "grad_norm": 1.7901902198791504, "learning_rate": 0.00016579948769470927, "loss": 1.8455, "step": 8189 }, { "epoch": 0.2933013411642523, "grad_norm": 1.7982251644134521, "learning_rate": 0.00016579075293549292, "loss": 1.3959, "step": 8190 }, { "epoch": 0.2933371532938206, "grad_norm": 1.3840776681900024, "learning_rate": 0.00016578201729114682, "loss": 1.5659, "step": 8191 }, { "epoch": 0.2933729654233889, "grad_norm": 1.91280996799469, "learning_rate": 0.00016577328076178855, "loss": 1.604, "step": 8192 }, { "epoch": 0.2934087775529572, "grad_norm": 1.5600473880767822, "learning_rate": 0.0001657645433475356, "loss": 1.3662, "step": 8193 }, { "epoch": 0.29344458968252546, "grad_norm": 1.603729248046875, "learning_rate": 0.0001657558050485056, "loss": 1.3253, "step": 8194 }, { "epoch": 0.2934804018120938, "grad_norm": 1.883900761604309, "learning_rate": 0.00016574706586481607, "loss": 1.2011, "step": 8195 }, { "epoch": 0.29351621394166205, "grad_norm": 2.2101593017578125, "learning_rate": 0.0001657383257965845, "loss": 1.7439, "step": 8196 }, { "epoch": 0.2935520260712303, "grad_norm": 2.143193006515503, "learning_rate": 0.0001657295848439286, "loss": 1.2733, "step": 8197 }, { "epoch": 0.2935878382007986, "grad_norm": 1.8261759281158447, "learning_rate": 0.00016572084300696594, "loss": 1.6156, "step": 8198 }, { "epoch": 0.2936236503303669, "grad_norm": 2.3701932430267334, "learning_rate": 0.0001657121002858141, "loss": 1.8107, "step": 8199 }, { "epoch": 0.2936594624599352, "grad_norm": 1.553908109664917, "learning_rate": 0.0001657033566805907, "loss": 1.2718, "step": 8200 }, { "epoch": 0.29369527458950345, "grad_norm": 1.8061621189117432, "learning_rate": 0.00016569461219141337, "loss": 1.8474, "step": 8201 }, { "epoch": 0.2937310867190718, "grad_norm": 3.017632484436035, "learning_rate": 0.00016568586681839982, "loss": 1.5226, "step": 8202 }, { "epoch": 0.29376689884864005, "grad_norm": 1.9157863855361938, "learning_rate": 0.00016567712056166762, "loss": 1.4055, "step": 8203 }, { "epoch": 0.2938027109782083, "grad_norm": 1.4582178592681885, "learning_rate": 0.0001656683734213345, "loss": 1.6516, "step": 8204 }, { "epoch": 0.2938385231077766, "grad_norm": 1.641868233680725, "learning_rate": 0.00016565962539751808, "loss": 1.6904, "step": 8205 }, { "epoch": 0.2938743352373449, "grad_norm": 1.7872939109802246, "learning_rate": 0.00016565087649033614, "loss": 1.3973, "step": 8206 }, { "epoch": 0.2939101473669132, "grad_norm": 1.8729043006896973, "learning_rate": 0.00016564212669990634, "loss": 1.4836, "step": 8207 }, { "epoch": 0.29394595949648145, "grad_norm": 1.4845998287200928, "learning_rate": 0.00016563337602634642, "loss": 1.6219, "step": 8208 }, { "epoch": 0.29398177162604977, "grad_norm": 1.3829447031021118, "learning_rate": 0.00016562462446977403, "loss": 1.5805, "step": 8209 }, { "epoch": 0.29401758375561804, "grad_norm": 1.5662988424301147, "learning_rate": 0.000165615872030307, "loss": 1.4469, "step": 8210 }, { "epoch": 0.2940533958851863, "grad_norm": 1.8096336126327515, "learning_rate": 0.00016560711870806303, "loss": 1.6272, "step": 8211 }, { "epoch": 0.2940892080147546, "grad_norm": 1.8783751726150513, "learning_rate": 0.00016559836450315992, "loss": 1.5174, "step": 8212 }, { "epoch": 0.2941250201443229, "grad_norm": 1.8585585355758667, "learning_rate": 0.00016558960941571543, "loss": 1.482, "step": 8213 }, { "epoch": 0.29416083227389117, "grad_norm": 1.624119758605957, "learning_rate": 0.00016558085344584736, "loss": 1.2782, "step": 8214 }, { "epoch": 0.29419664440345944, "grad_norm": 1.2839974164962769, "learning_rate": 0.00016557209659367347, "loss": 1.5674, "step": 8215 }, { "epoch": 0.29423245653302776, "grad_norm": 2.2805612087249756, "learning_rate": 0.00016556333885931162, "loss": 1.6619, "step": 8216 }, { "epoch": 0.29426826866259603, "grad_norm": 1.5599788427352905, "learning_rate": 0.00016555458024287964, "loss": 1.7332, "step": 8217 }, { "epoch": 0.2943040807921643, "grad_norm": 2.071613311767578, "learning_rate": 0.0001655458207444953, "loss": 1.7957, "step": 8218 }, { "epoch": 0.29433989292173257, "grad_norm": 1.492445945739746, "learning_rate": 0.0001655370603642765, "loss": 1.5455, "step": 8219 }, { "epoch": 0.2943757050513009, "grad_norm": 1.4632467031478882, "learning_rate": 0.0001655282991023411, "loss": 1.5157, "step": 8220 }, { "epoch": 0.29441151718086916, "grad_norm": 2.3645126819610596, "learning_rate": 0.000165519536958807, "loss": 1.7148, "step": 8221 }, { "epoch": 0.29444732931043743, "grad_norm": 1.6779077053070068, "learning_rate": 0.000165510773933792, "loss": 1.3695, "step": 8222 }, { "epoch": 0.2944831414400057, "grad_norm": 1.7853635549545288, "learning_rate": 0.00016550201002741403, "loss": 1.5484, "step": 8223 }, { "epoch": 0.294518953569574, "grad_norm": 2.639172077178955, "learning_rate": 0.00016549324523979102, "loss": 1.8244, "step": 8224 }, { "epoch": 0.2945547656991423, "grad_norm": 1.921919345855713, "learning_rate": 0.0001654844795710409, "loss": 1.7747, "step": 8225 }, { "epoch": 0.29459057782871056, "grad_norm": 1.9929426908493042, "learning_rate": 0.00016547571302128153, "loss": 1.6522, "step": 8226 }, { "epoch": 0.2946263899582789, "grad_norm": 1.6712838411331177, "learning_rate": 0.00016546694559063093, "loss": 1.5273, "step": 8227 }, { "epoch": 0.29466220208784716, "grad_norm": 1.7256261110305786, "learning_rate": 0.000165458177279207, "loss": 1.7917, "step": 8228 }, { "epoch": 0.2946980142174154, "grad_norm": 1.6912678480148315, "learning_rate": 0.00016544940808712775, "loss": 1.5481, "step": 8229 }, { "epoch": 0.2947338263469837, "grad_norm": 1.7609024047851562, "learning_rate": 0.00016544063801451114, "loss": 1.7015, "step": 8230 }, { "epoch": 0.294769638476552, "grad_norm": 2.914182662963867, "learning_rate": 0.00016543186706147514, "loss": 1.8822, "step": 8231 }, { "epoch": 0.2948054506061203, "grad_norm": 1.4983017444610596, "learning_rate": 0.00016542309522813779, "loss": 1.5277, "step": 8232 }, { "epoch": 0.29484126273568856, "grad_norm": 1.6739991903305054, "learning_rate": 0.00016541432251461705, "loss": 1.5439, "step": 8233 }, { "epoch": 0.2948770748652569, "grad_norm": 1.3391531705856323, "learning_rate": 0.000165405548921031, "loss": 1.3658, "step": 8234 }, { "epoch": 0.29491288699482515, "grad_norm": 1.941439151763916, "learning_rate": 0.0001653967744474977, "loss": 1.2919, "step": 8235 }, { "epoch": 0.2949486991243934, "grad_norm": 1.8408081531524658, "learning_rate": 0.00016538799909413508, "loss": 1.4787, "step": 8236 }, { "epoch": 0.2949845112539617, "grad_norm": 1.8048700094223022, "learning_rate": 0.00016537922286106134, "loss": 1.5778, "step": 8237 }, { "epoch": 0.29502032338353, "grad_norm": 2.9586312770843506, "learning_rate": 0.00016537044574839444, "loss": 1.5999, "step": 8238 }, { "epoch": 0.2950561355130983, "grad_norm": 1.534470796585083, "learning_rate": 0.00016536166775625252, "loss": 1.7927, "step": 8239 }, { "epoch": 0.29509194764266655, "grad_norm": 1.389212965965271, "learning_rate": 0.0001653528888847537, "loss": 1.4612, "step": 8240 }, { "epoch": 0.2951277597722349, "grad_norm": 1.5178078413009644, "learning_rate": 0.00016534410913401603, "loss": 1.491, "step": 8241 }, { "epoch": 0.29516357190180315, "grad_norm": 2.1112802028656006, "learning_rate": 0.0001653353285041577, "loss": 1.4394, "step": 8242 }, { "epoch": 0.2951993840313714, "grad_norm": 1.57144296169281, "learning_rate": 0.00016532654699529678, "loss": 1.7182, "step": 8243 }, { "epoch": 0.2952351961609397, "grad_norm": 1.8326722383499146, "learning_rate": 0.00016531776460755143, "loss": 1.4076, "step": 8244 }, { "epoch": 0.295271008290508, "grad_norm": 1.9306200742721558, "learning_rate": 0.0001653089813410398, "loss": 1.6902, "step": 8245 }, { "epoch": 0.2953068204200763, "grad_norm": 1.3576823472976685, "learning_rate": 0.00016530019719588007, "loss": 1.4347, "step": 8246 }, { "epoch": 0.29534263254964455, "grad_norm": 1.4128303527832031, "learning_rate": 0.00016529141217219045, "loss": 1.6012, "step": 8247 }, { "epoch": 0.29537844467921287, "grad_norm": 1.5558239221572876, "learning_rate": 0.00016528262627008906, "loss": 1.4238, "step": 8248 }, { "epoch": 0.29541425680878114, "grad_norm": 1.8013745546340942, "learning_rate": 0.00016527383948969416, "loss": 1.7537, "step": 8249 }, { "epoch": 0.2954500689383494, "grad_norm": 1.5508323907852173, "learning_rate": 0.00016526505183112394, "loss": 1.6048, "step": 8250 }, { "epoch": 0.2954858810679177, "grad_norm": 1.7320663928985596, "learning_rate": 0.00016525626329449668, "loss": 1.7845, "step": 8251 }, { "epoch": 0.295521693197486, "grad_norm": 1.934444785118103, "learning_rate": 0.0001652474738799305, "loss": 1.6607, "step": 8252 }, { "epoch": 0.29555750532705427, "grad_norm": 1.4806501865386963, "learning_rate": 0.00016523868358754378, "loss": 1.5327, "step": 8253 }, { "epoch": 0.29559331745662254, "grad_norm": 2.4541170597076416, "learning_rate": 0.00016522989241745469, "loss": 1.4625, "step": 8254 }, { "epoch": 0.29562912958619086, "grad_norm": 1.7268229722976685, "learning_rate": 0.00016522110036978153, "loss": 1.3799, "step": 8255 }, { "epoch": 0.29566494171575913, "grad_norm": 2.0476462841033936, "learning_rate": 0.0001652123074446426, "loss": 1.7806, "step": 8256 }, { "epoch": 0.2957007538453274, "grad_norm": 1.8949542045593262, "learning_rate": 0.00016520351364215623, "loss": 1.8599, "step": 8257 }, { "epoch": 0.29573656597489567, "grad_norm": 1.6571156978607178, "learning_rate": 0.00016519471896244063, "loss": 1.5826, "step": 8258 }, { "epoch": 0.295772378104464, "grad_norm": 1.580122947692871, "learning_rate": 0.00016518592340561422, "loss": 1.45, "step": 8259 }, { "epoch": 0.29580819023403226, "grad_norm": 1.4408222436904907, "learning_rate": 0.0001651771269717953, "loss": 1.3734, "step": 8260 }, { "epoch": 0.29584400236360053, "grad_norm": 1.6053831577301025, "learning_rate": 0.0001651683296611022, "loss": 1.7505, "step": 8261 }, { "epoch": 0.29587981449316886, "grad_norm": 1.855286717414856, "learning_rate": 0.0001651595314736533, "loss": 1.4902, "step": 8262 }, { "epoch": 0.2959156266227371, "grad_norm": 2.0580155849456787, "learning_rate": 0.00016515073240956692, "loss": 1.6515, "step": 8263 }, { "epoch": 0.2959514387523054, "grad_norm": 1.4917335510253906, "learning_rate": 0.0001651419324689615, "loss": 1.5483, "step": 8264 }, { "epoch": 0.29598725088187366, "grad_norm": 1.6483070850372314, "learning_rate": 0.00016513313165195538, "loss": 1.4299, "step": 8265 }, { "epoch": 0.296023063011442, "grad_norm": 1.8228284120559692, "learning_rate": 0.00016512432995866702, "loss": 1.4021, "step": 8266 }, { "epoch": 0.29605887514101026, "grad_norm": 1.4714165925979614, "learning_rate": 0.00016511552738921479, "loss": 1.7201, "step": 8267 }, { "epoch": 0.2960946872705785, "grad_norm": 1.59091317653656, "learning_rate": 0.0001651067239437171, "loss": 1.686, "step": 8268 }, { "epoch": 0.29613049940014685, "grad_norm": 1.409410834312439, "learning_rate": 0.00016509791962229247, "loss": 1.5451, "step": 8269 }, { "epoch": 0.2961663115297151, "grad_norm": 1.9544563293457031, "learning_rate": 0.0001650891144250593, "loss": 1.6566, "step": 8270 }, { "epoch": 0.2962021236592834, "grad_norm": 1.4585261344909668, "learning_rate": 0.00016508030835213605, "loss": 1.4464, "step": 8271 }, { "epoch": 0.29623793578885166, "grad_norm": 1.7657428979873657, "learning_rate": 0.00016507150140364116, "loss": 1.4297, "step": 8272 }, { "epoch": 0.29627374791842, "grad_norm": 1.381548285484314, "learning_rate": 0.0001650626935796932, "loss": 1.5166, "step": 8273 }, { "epoch": 0.29630956004798825, "grad_norm": 1.7373437881469727, "learning_rate": 0.00016505388488041058, "loss": 1.5385, "step": 8274 }, { "epoch": 0.2963453721775565, "grad_norm": 1.6844048500061035, "learning_rate": 0.0001650450753059119, "loss": 1.4545, "step": 8275 }, { "epoch": 0.29638118430712485, "grad_norm": 1.7241437435150146, "learning_rate": 0.00016503626485631561, "loss": 1.5535, "step": 8276 }, { "epoch": 0.2964169964366931, "grad_norm": 1.5121371746063232, "learning_rate": 0.00016502745353174026, "loss": 1.7588, "step": 8277 }, { "epoch": 0.2964528085662614, "grad_norm": 2.0365071296691895, "learning_rate": 0.0001650186413323044, "loss": 1.7113, "step": 8278 }, { "epoch": 0.29648862069582965, "grad_norm": 1.6844797134399414, "learning_rate": 0.0001650098282581266, "loss": 1.3356, "step": 8279 }, { "epoch": 0.296524432825398, "grad_norm": 1.9010940790176392, "learning_rate": 0.00016500101430932541, "loss": 1.7213, "step": 8280 }, { "epoch": 0.29656024495496625, "grad_norm": 1.565537452697754, "learning_rate": 0.00016499219948601943, "loss": 1.6433, "step": 8281 }, { "epoch": 0.2965960570845345, "grad_norm": 1.7080549001693726, "learning_rate": 0.00016498338378832724, "loss": 1.5829, "step": 8282 }, { "epoch": 0.29663186921410284, "grad_norm": 1.454037070274353, "learning_rate": 0.00016497456721636743, "loss": 1.5696, "step": 8283 }, { "epoch": 0.2966676813436711, "grad_norm": 1.5883538722991943, "learning_rate": 0.00016496574977025862, "loss": 1.5431, "step": 8284 }, { "epoch": 0.2967034934732394, "grad_norm": 1.2771259546279907, "learning_rate": 0.00016495693145011947, "loss": 1.2164, "step": 8285 }, { "epoch": 0.29673930560280765, "grad_norm": 1.6104718446731567, "learning_rate": 0.00016494811225606858, "loss": 1.3776, "step": 8286 }, { "epoch": 0.29677511773237597, "grad_norm": 1.967247724533081, "learning_rate": 0.00016493929218822467, "loss": 1.2856, "step": 8287 }, { "epoch": 0.29681092986194424, "grad_norm": 1.7795873880386353, "learning_rate": 0.0001649304712467063, "loss": 1.4519, "step": 8288 }, { "epoch": 0.2968467419915125, "grad_norm": 2.0310049057006836, "learning_rate": 0.00016492164943163217, "loss": 1.5672, "step": 8289 }, { "epoch": 0.29688255412108083, "grad_norm": 1.3638725280761719, "learning_rate": 0.00016491282674312103, "loss": 1.5978, "step": 8290 }, { "epoch": 0.2969183662506491, "grad_norm": 1.5186011791229248, "learning_rate": 0.00016490400318129153, "loss": 1.3685, "step": 8291 }, { "epoch": 0.29695417838021737, "grad_norm": 1.4526851177215576, "learning_rate": 0.0001648951787462624, "loss": 1.5908, "step": 8292 }, { "epoch": 0.29698999050978564, "grad_norm": 2.9446048736572266, "learning_rate": 0.0001648863534381523, "loss": 1.8091, "step": 8293 }, { "epoch": 0.29702580263935396, "grad_norm": 1.383347511291504, "learning_rate": 0.00016487752725708005, "loss": 1.5765, "step": 8294 }, { "epoch": 0.29706161476892223, "grad_norm": 1.7374597787857056, "learning_rate": 0.00016486870020316437, "loss": 1.4451, "step": 8295 }, { "epoch": 0.2970974268984905, "grad_norm": 1.5283315181732178, "learning_rate": 0.000164859872276524, "loss": 1.5174, "step": 8296 }, { "epoch": 0.2971332390280588, "grad_norm": 1.4629818201065063, "learning_rate": 0.0001648510434772777, "loss": 1.5145, "step": 8297 }, { "epoch": 0.2971690511576271, "grad_norm": 1.7839181423187256, "learning_rate": 0.00016484221380554424, "loss": 1.4526, "step": 8298 }, { "epoch": 0.29720486328719536, "grad_norm": 1.8995558023452759, "learning_rate": 0.00016483338326144244, "loss": 1.5883, "step": 8299 }, { "epoch": 0.29724067541676363, "grad_norm": 1.3919588327407837, "learning_rate": 0.0001648245518450911, "loss": 1.2238, "step": 8300 }, { "epoch": 0.29727648754633196, "grad_norm": 1.8809860944747925, "learning_rate": 0.00016481571955660903, "loss": 1.6602, "step": 8301 }, { "epoch": 0.2973122996759002, "grad_norm": 1.5680149793624878, "learning_rate": 0.0001648068863961151, "loss": 1.4787, "step": 8302 }, { "epoch": 0.2973481118054685, "grad_norm": 1.533979892730713, "learning_rate": 0.00016479805236372806, "loss": 1.3545, "step": 8303 }, { "epoch": 0.2973839239350368, "grad_norm": 1.733001708984375, "learning_rate": 0.00016478921745956686, "loss": 1.7839, "step": 8304 }, { "epoch": 0.2974197360646051, "grad_norm": 2.160454750061035, "learning_rate": 0.00016478038168375028, "loss": 1.7761, "step": 8305 }, { "epoch": 0.29745554819417336, "grad_norm": 1.3565226793289185, "learning_rate": 0.00016477154503639723, "loss": 1.5211, "step": 8306 }, { "epoch": 0.2974913603237416, "grad_norm": 1.4650872945785522, "learning_rate": 0.00016476270751762656, "loss": 1.1181, "step": 8307 }, { "epoch": 0.29752717245330995, "grad_norm": 1.9036059379577637, "learning_rate": 0.00016475386912755724, "loss": 1.4345, "step": 8308 }, { "epoch": 0.2975629845828782, "grad_norm": 2.0247349739074707, "learning_rate": 0.0001647450298663081, "loss": 1.7541, "step": 8309 }, { "epoch": 0.2975987967124465, "grad_norm": 1.7385376691818237, "learning_rate": 0.00016473618973399811, "loss": 1.5686, "step": 8310 }, { "epoch": 0.2976346088420148, "grad_norm": 1.8126300573349, "learning_rate": 0.00016472734873074622, "loss": 1.8558, "step": 8311 }, { "epoch": 0.2976704209715831, "grad_norm": 1.5259616374969482, "learning_rate": 0.00016471850685667133, "loss": 1.4215, "step": 8312 }, { "epoch": 0.29770623310115135, "grad_norm": 1.6709272861480713, "learning_rate": 0.0001647096641118924, "loss": 1.5873, "step": 8313 }, { "epoch": 0.2977420452307196, "grad_norm": 1.7461010217666626, "learning_rate": 0.00016470082049652843, "loss": 1.579, "step": 8314 }, { "epoch": 0.29777785736028795, "grad_norm": 1.4879776239395142, "learning_rate": 0.00016469197601069838, "loss": 1.4497, "step": 8315 }, { "epoch": 0.2978136694898562, "grad_norm": 1.4053375720977783, "learning_rate": 0.00016468313065452121, "loss": 1.5219, "step": 8316 }, { "epoch": 0.2978494816194245, "grad_norm": 1.5887116193771362, "learning_rate": 0.00016467428442811595, "loss": 1.3375, "step": 8317 }, { "epoch": 0.2978852937489928, "grad_norm": 2.0717251300811768, "learning_rate": 0.00016466543733160163, "loss": 1.7047, "step": 8318 }, { "epoch": 0.2979211058785611, "grad_norm": 1.4569612741470337, "learning_rate": 0.00016465658936509726, "loss": 1.4269, "step": 8319 }, { "epoch": 0.29795691800812935, "grad_norm": 1.4222359657287598, "learning_rate": 0.0001646477405287219, "loss": 1.5088, "step": 8320 }, { "epoch": 0.2979927301376976, "grad_norm": 1.5125815868377686, "learning_rate": 0.00016463889082259456, "loss": 1.5915, "step": 8321 }, { "epoch": 0.29802854226726594, "grad_norm": 1.9704197645187378, "learning_rate": 0.00016463004024683432, "loss": 1.6187, "step": 8322 }, { "epoch": 0.2980643543968342, "grad_norm": 1.6360664367675781, "learning_rate": 0.0001646211888015603, "loss": 1.3247, "step": 8323 }, { "epoch": 0.2981001665264025, "grad_norm": 1.4352376461029053, "learning_rate": 0.0001646123364868915, "loss": 1.3175, "step": 8324 }, { "epoch": 0.2981359786559708, "grad_norm": 1.6311535835266113, "learning_rate": 0.00016460348330294704, "loss": 1.2099, "step": 8325 }, { "epoch": 0.29817179078553907, "grad_norm": 1.5380768775939941, "learning_rate": 0.00016459462924984605, "loss": 1.782, "step": 8326 }, { "epoch": 0.29820760291510734, "grad_norm": 1.4307458400726318, "learning_rate": 0.00016458577432770766, "loss": 1.5807, "step": 8327 }, { "epoch": 0.2982434150446756, "grad_norm": 1.925248384475708, "learning_rate": 0.000164576918536651, "loss": 1.4601, "step": 8328 }, { "epoch": 0.29827922717424393, "grad_norm": 1.3756663799285889, "learning_rate": 0.0001645680618767952, "loss": 1.487, "step": 8329 }, { "epoch": 0.2983150393038122, "grad_norm": 1.7213997840881348, "learning_rate": 0.00016455920434825936, "loss": 1.681, "step": 8330 }, { "epoch": 0.29835085143338047, "grad_norm": 1.3830705881118774, "learning_rate": 0.00016455034595116278, "loss": 1.3369, "step": 8331 }, { "epoch": 0.2983866635629488, "grad_norm": 1.7282135486602783, "learning_rate": 0.00016454148668562454, "loss": 1.1979, "step": 8332 }, { "epoch": 0.29842247569251706, "grad_norm": 2.0980710983276367, "learning_rate": 0.0001645326265517638, "loss": 1.4047, "step": 8333 }, { "epoch": 0.29845828782208533, "grad_norm": 1.5762666463851929, "learning_rate": 0.00016452376554969983, "loss": 1.7234, "step": 8334 }, { "epoch": 0.2984940999516536, "grad_norm": 1.630379319190979, "learning_rate": 0.00016451490367955183, "loss": 1.5907, "step": 8335 }, { "epoch": 0.2985299120812219, "grad_norm": 2.5405523777008057, "learning_rate": 0.00016450604094143904, "loss": 1.6051, "step": 8336 }, { "epoch": 0.2985657242107902, "grad_norm": 1.3858790397644043, "learning_rate": 0.00016449717733548066, "loss": 1.5432, "step": 8337 }, { "epoch": 0.29860153634035846, "grad_norm": 1.749637246131897, "learning_rate": 0.00016448831286179595, "loss": 1.6572, "step": 8338 }, { "epoch": 0.2986373484699268, "grad_norm": 1.4837186336517334, "learning_rate": 0.00016447944752050417, "loss": 1.552, "step": 8339 }, { "epoch": 0.29867316059949506, "grad_norm": 1.4648246765136719, "learning_rate": 0.00016447058131172462, "loss": 1.4813, "step": 8340 }, { "epoch": 0.2987089727290633, "grad_norm": 1.9088497161865234, "learning_rate": 0.00016446171423557652, "loss": 1.6785, "step": 8341 }, { "epoch": 0.2987447848586316, "grad_norm": 1.8436561822891235, "learning_rate": 0.00016445284629217923, "loss": 1.6028, "step": 8342 }, { "epoch": 0.2987805969881999, "grad_norm": 1.7736629247665405, "learning_rate": 0.00016444397748165205, "loss": 1.672, "step": 8343 }, { "epoch": 0.2988164091177682, "grad_norm": 1.4750014543533325, "learning_rate": 0.00016443510780411423, "loss": 1.5656, "step": 8344 }, { "epoch": 0.29885222124733646, "grad_norm": 2.1682565212249756, "learning_rate": 0.0001644262372596852, "loss": 1.507, "step": 8345 }, { "epoch": 0.2988880333769048, "grad_norm": 1.6554548740386963, "learning_rate": 0.00016441736584848422, "loss": 1.4692, "step": 8346 }, { "epoch": 0.29892384550647305, "grad_norm": 1.3574022054672241, "learning_rate": 0.0001644084935706307, "loss": 1.4869, "step": 8347 }, { "epoch": 0.2989596576360413, "grad_norm": 1.7374560832977295, "learning_rate": 0.00016439962042624396, "loss": 1.7313, "step": 8348 }, { "epoch": 0.2989954697656096, "grad_norm": 1.4507265090942383, "learning_rate": 0.0001643907464154434, "loss": 1.569, "step": 8349 }, { "epoch": 0.2990312818951779, "grad_norm": 1.6869750022888184, "learning_rate": 0.00016438187153834842, "loss": 1.5569, "step": 8350 }, { "epoch": 0.2990670940247462, "grad_norm": 1.4608795642852783, "learning_rate": 0.0001643729957950784, "loss": 1.5137, "step": 8351 }, { "epoch": 0.29910290615431445, "grad_norm": 1.5899919271469116, "learning_rate": 0.00016436411918575275, "loss": 1.4346, "step": 8352 }, { "epoch": 0.2991387182838828, "grad_norm": 1.7465403079986572, "learning_rate": 0.00016435524171049088, "loss": 1.8228, "step": 8353 }, { "epoch": 0.29917453041345105, "grad_norm": 1.4676436185836792, "learning_rate": 0.00016434636336941228, "loss": 1.9146, "step": 8354 }, { "epoch": 0.2992103425430193, "grad_norm": 1.4257675409317017, "learning_rate": 0.00016433748416263633, "loss": 1.7837, "step": 8355 }, { "epoch": 0.2992461546725876, "grad_norm": 2.4013359546661377, "learning_rate": 0.00016432860409028253, "loss": 1.7198, "step": 8356 }, { "epoch": 0.2992819668021559, "grad_norm": 1.3581948280334473, "learning_rate": 0.00016431972315247037, "loss": 1.3561, "step": 8357 }, { "epoch": 0.2993177789317242, "grad_norm": 1.4917012453079224, "learning_rate": 0.00016431084134931927, "loss": 1.789, "step": 8358 }, { "epoch": 0.29935359106129245, "grad_norm": 1.5227102041244507, "learning_rate": 0.00016430195868094875, "loss": 1.6315, "step": 8359 }, { "epoch": 0.29938940319086077, "grad_norm": 1.2591630220413208, "learning_rate": 0.00016429307514747834, "loss": 1.6123, "step": 8360 }, { "epoch": 0.29942521532042904, "grad_norm": 1.3538967370986938, "learning_rate": 0.00016428419074902752, "loss": 1.4282, "step": 8361 }, { "epoch": 0.2994610274499973, "grad_norm": 2.2829110622406006, "learning_rate": 0.00016427530548571585, "loss": 1.5408, "step": 8362 }, { "epoch": 0.2994968395795656, "grad_norm": 1.5202713012695312, "learning_rate": 0.00016426641935766284, "loss": 1.5582, "step": 8363 }, { "epoch": 0.2995326517091339, "grad_norm": 1.6621960401535034, "learning_rate": 0.00016425753236498807, "loss": 1.3942, "step": 8364 }, { "epoch": 0.29956846383870217, "grad_norm": 1.912682294845581, "learning_rate": 0.00016424864450781108, "loss": 1.6684, "step": 8365 }, { "epoch": 0.29960427596827044, "grad_norm": 1.3282166719436646, "learning_rate": 0.00016423975578625142, "loss": 1.5807, "step": 8366 }, { "epoch": 0.29964008809783876, "grad_norm": 1.7995824813842773, "learning_rate": 0.00016423086620042879, "loss": 1.4495, "step": 8367 }, { "epoch": 0.29967590022740703, "grad_norm": 1.258223056793213, "learning_rate": 0.00016422197575046265, "loss": 1.7473, "step": 8368 }, { "epoch": 0.2997117123569753, "grad_norm": 1.346404790878296, "learning_rate": 0.00016421308443647265, "loss": 1.4512, "step": 8369 }, { "epoch": 0.29974752448654357, "grad_norm": 1.8389484882354736, "learning_rate": 0.00016420419225857846, "loss": 1.5317, "step": 8370 }, { "epoch": 0.2997833366161119, "grad_norm": 1.1908183097839355, "learning_rate": 0.00016419529921689967, "loss": 1.2523, "step": 8371 }, { "epoch": 0.29981914874568016, "grad_norm": 1.4290704727172852, "learning_rate": 0.00016418640531155597, "loss": 1.552, "step": 8372 }, { "epoch": 0.29985496087524843, "grad_norm": 1.5900216102600098, "learning_rate": 0.00016417751054266692, "loss": 1.4178, "step": 8373 }, { "epoch": 0.29989077300481676, "grad_norm": 1.5539711713790894, "learning_rate": 0.00016416861491035228, "loss": 1.1549, "step": 8374 }, { "epoch": 0.299926585134385, "grad_norm": 1.5073728561401367, "learning_rate": 0.0001641597184147317, "loss": 1.4495, "step": 8375 }, { "epoch": 0.2999623972639533, "grad_norm": 1.420148491859436, "learning_rate": 0.0001641508210559249, "loss": 1.381, "step": 8376 }, { "epoch": 0.29999820939352156, "grad_norm": 1.7865599393844604, "learning_rate": 0.00016414192283405147, "loss": 1.3243, "step": 8377 }, { "epoch": 0.3000340215230899, "grad_norm": 2.0387165546417236, "learning_rate": 0.00016413302374923124, "loss": 1.6688, "step": 8378 }, { "epoch": 0.30006983365265816, "grad_norm": 1.510023832321167, "learning_rate": 0.00016412412380158392, "loss": 1.726, "step": 8379 }, { "epoch": 0.3001056457822264, "grad_norm": 1.5913726091384888, "learning_rate": 0.00016411522299122924, "loss": 1.7165, "step": 8380 }, { "epoch": 0.30014145791179475, "grad_norm": 1.7233575582504272, "learning_rate": 0.0001641063213182869, "loss": 1.5722, "step": 8381 }, { "epoch": 0.300177270041363, "grad_norm": 1.4803311824798584, "learning_rate": 0.00016409741878287671, "loss": 1.2891, "step": 8382 }, { "epoch": 0.3002130821709313, "grad_norm": 2.3403327465057373, "learning_rate": 0.00016408851538511846, "loss": 1.494, "step": 8383 }, { "epoch": 0.30024889430049956, "grad_norm": 1.5356950759887695, "learning_rate": 0.0001640796111251319, "loss": 1.2206, "step": 8384 }, { "epoch": 0.3002847064300679, "grad_norm": 1.619539737701416, "learning_rate": 0.0001640707060030368, "loss": 1.5673, "step": 8385 }, { "epoch": 0.30032051855963615, "grad_norm": 1.3473477363586426, "learning_rate": 0.00016406180001895298, "loss": 1.7308, "step": 8386 }, { "epoch": 0.3003563306892044, "grad_norm": 1.37346351146698, "learning_rate": 0.00016405289317300033, "loss": 1.307, "step": 8387 }, { "epoch": 0.30039214281877274, "grad_norm": 1.9907050132751465, "learning_rate": 0.00016404398546529859, "loss": 1.7465, "step": 8388 }, { "epoch": 0.300427954948341, "grad_norm": 1.3930773735046387, "learning_rate": 0.00016403507689596763, "loss": 1.4954, "step": 8389 }, { "epoch": 0.3004637670779093, "grad_norm": 2.656479835510254, "learning_rate": 0.0001640261674651273, "loss": 1.4997, "step": 8390 }, { "epoch": 0.30049957920747755, "grad_norm": 2.1430234909057617, "learning_rate": 0.0001640172571728975, "loss": 1.6448, "step": 8391 }, { "epoch": 0.3005353913370459, "grad_norm": 1.5094202756881714, "learning_rate": 0.0001640083460193981, "loss": 1.3046, "step": 8392 }, { "epoch": 0.30057120346661415, "grad_norm": 1.5384578704833984, "learning_rate": 0.00016399943400474895, "loss": 1.3601, "step": 8393 }, { "epoch": 0.3006070155961824, "grad_norm": 1.3882454633712769, "learning_rate": 0.00016399052112906994, "loss": 1.3568, "step": 8394 }, { "epoch": 0.30064282772575074, "grad_norm": 1.2879197597503662, "learning_rate": 0.00016398160739248104, "loss": 1.1968, "step": 8395 }, { "epoch": 0.300678639855319, "grad_norm": 1.569235920906067, "learning_rate": 0.00016397269279510215, "loss": 1.4976, "step": 8396 }, { "epoch": 0.3007144519848873, "grad_norm": 1.489635705947876, "learning_rate": 0.00016396377733705317, "loss": 1.4407, "step": 8397 }, { "epoch": 0.30075026411445555, "grad_norm": 2.0916008949279785, "learning_rate": 0.00016395486101845408, "loss": 1.3873, "step": 8398 }, { "epoch": 0.30078607624402387, "grad_norm": 1.9567631483078003, "learning_rate": 0.00016394594383942486, "loss": 1.2718, "step": 8399 }, { "epoch": 0.30082188837359214, "grad_norm": 2.1201071739196777, "learning_rate": 0.00016393702580008542, "loss": 1.3483, "step": 8400 }, { "epoch": 0.3008577005031604, "grad_norm": 1.7831958532333374, "learning_rate": 0.00016392810690055577, "loss": 1.4788, "step": 8401 }, { "epoch": 0.30089351263272873, "grad_norm": 1.9678820371627808, "learning_rate": 0.00016391918714095592, "loss": 1.6395, "step": 8402 }, { "epoch": 0.300929324762297, "grad_norm": 1.531544804573059, "learning_rate": 0.00016391026652140585, "loss": 1.4363, "step": 8403 }, { "epoch": 0.30096513689186527, "grad_norm": 2.5241010189056396, "learning_rate": 0.00016390134504202557, "loss": 1.3736, "step": 8404 }, { "epoch": 0.30100094902143354, "grad_norm": 2.2505462169647217, "learning_rate": 0.00016389242270293514, "loss": 1.1755, "step": 8405 }, { "epoch": 0.30103676115100186, "grad_norm": 1.7323700189590454, "learning_rate": 0.00016388349950425456, "loss": 1.5693, "step": 8406 }, { "epoch": 0.30107257328057013, "grad_norm": 1.761555552482605, "learning_rate": 0.0001638745754461039, "loss": 1.3442, "step": 8407 }, { "epoch": 0.3011083854101384, "grad_norm": 1.8615801334381104, "learning_rate": 0.00016386565052860323, "loss": 1.6013, "step": 8408 }, { "epoch": 0.3011441975397067, "grad_norm": 1.4850836992263794, "learning_rate": 0.00016385672475187262, "loss": 1.4907, "step": 8409 }, { "epoch": 0.301180009669275, "grad_norm": 1.6146981716156006, "learning_rate": 0.00016384779811603214, "loss": 1.5575, "step": 8410 }, { "epoch": 0.30121582179884326, "grad_norm": 1.6145588159561157, "learning_rate": 0.0001638388706212019, "loss": 1.4367, "step": 8411 }, { "epoch": 0.30125163392841153, "grad_norm": 2.3963756561279297, "learning_rate": 0.000163829942267502, "loss": 1.5233, "step": 8412 }, { "epoch": 0.30128744605797986, "grad_norm": 1.9880651235580444, "learning_rate": 0.00016382101305505254, "loss": 1.676, "step": 8413 }, { "epoch": 0.3013232581875481, "grad_norm": 1.5233274698257446, "learning_rate": 0.0001638120829839737, "loss": 1.5215, "step": 8414 }, { "epoch": 0.3013590703171164, "grad_norm": 1.7098640203475952, "learning_rate": 0.00016380315205438554, "loss": 1.3578, "step": 8415 }, { "epoch": 0.3013948824466847, "grad_norm": 2.10298490524292, "learning_rate": 0.00016379422026640831, "loss": 1.5301, "step": 8416 }, { "epoch": 0.301430694576253, "grad_norm": 1.466875433921814, "learning_rate": 0.00016378528762016218, "loss": 1.2364, "step": 8417 }, { "epoch": 0.30146650670582126, "grad_norm": 1.7537953853607178, "learning_rate": 0.00016377635411576723, "loss": 1.5604, "step": 8418 }, { "epoch": 0.3015023188353895, "grad_norm": 2.448864221572876, "learning_rate": 0.00016376741975334368, "loss": 1.6991, "step": 8419 }, { "epoch": 0.30153813096495785, "grad_norm": 2.6458969116210938, "learning_rate": 0.0001637584845330118, "loss": 1.9327, "step": 8420 }, { "epoch": 0.3015739430945261, "grad_norm": 1.5955549478530884, "learning_rate": 0.00016374954845489175, "loss": 1.4872, "step": 8421 }, { "epoch": 0.3016097552240944, "grad_norm": 1.0321791172027588, "learning_rate": 0.00016374061151910372, "loss": 1.2884, "step": 8422 }, { "epoch": 0.30164556735366266, "grad_norm": 1.4548122882843018, "learning_rate": 0.000163731673725768, "loss": 1.3905, "step": 8423 }, { "epoch": 0.301681379483231, "grad_norm": 1.4599908590316772, "learning_rate": 0.00016372273507500481, "loss": 1.637, "step": 8424 }, { "epoch": 0.30171719161279925, "grad_norm": 1.4581354856491089, "learning_rate": 0.00016371379556693442, "loss": 1.4863, "step": 8425 }, { "epoch": 0.3017530037423675, "grad_norm": 1.6916433572769165, "learning_rate": 0.0001637048552016771, "loss": 1.3605, "step": 8426 }, { "epoch": 0.30178881587193584, "grad_norm": 1.710843801498413, "learning_rate": 0.00016369591397935314, "loss": 1.6055, "step": 8427 }, { "epoch": 0.3018246280015041, "grad_norm": 1.4895509481430054, "learning_rate": 0.0001636869719000828, "loss": 1.4946, "step": 8428 }, { "epoch": 0.3018604401310724, "grad_norm": 1.3431249856948853, "learning_rate": 0.0001636780289639864, "loss": 1.5975, "step": 8429 }, { "epoch": 0.30189625226064065, "grad_norm": 1.9175156354904175, "learning_rate": 0.00016366908517118428, "loss": 1.2682, "step": 8430 }, { "epoch": 0.301932064390209, "grad_norm": 1.7841920852661133, "learning_rate": 0.00016366014052179674, "loss": 1.1883, "step": 8431 }, { "epoch": 0.30196787651977725, "grad_norm": 2.296452760696411, "learning_rate": 0.0001636511950159441, "loss": 1.4858, "step": 8432 }, { "epoch": 0.3020036886493455, "grad_norm": 1.7015984058380127, "learning_rate": 0.00016364224865374677, "loss": 1.6448, "step": 8433 }, { "epoch": 0.30203950077891384, "grad_norm": 1.5796960592269897, "learning_rate": 0.00016363330143532508, "loss": 1.5356, "step": 8434 }, { "epoch": 0.3020753129084821, "grad_norm": 1.562314510345459, "learning_rate": 0.00016362435336079938, "loss": 1.503, "step": 8435 }, { "epoch": 0.3021111250380504, "grad_norm": 1.5993971824645996, "learning_rate": 0.00016361540443029008, "loss": 1.7656, "step": 8436 }, { "epoch": 0.30214693716761865, "grad_norm": 2.1478962898254395, "learning_rate": 0.00016360645464391754, "loss": 1.4395, "step": 8437 }, { "epoch": 0.30218274929718697, "grad_norm": 2.219712257385254, "learning_rate": 0.00016359750400180226, "loss": 1.7623, "step": 8438 }, { "epoch": 0.30221856142675524, "grad_norm": 1.7242192029953003, "learning_rate": 0.00016358855250406455, "loss": 1.3607, "step": 8439 }, { "epoch": 0.3022543735563235, "grad_norm": 1.4670357704162598, "learning_rate": 0.0001635796001508249, "loss": 1.6872, "step": 8440 }, { "epoch": 0.30229018568589183, "grad_norm": 1.7151837348937988, "learning_rate": 0.00016357064694220375, "loss": 1.7587, "step": 8441 }, { "epoch": 0.3023259978154601, "grad_norm": 2.2550930976867676, "learning_rate": 0.00016356169287832156, "loss": 1.598, "step": 8442 }, { "epoch": 0.30236180994502837, "grad_norm": 1.5630356073379517, "learning_rate": 0.00016355273795929875, "loss": 1.6793, "step": 8443 }, { "epoch": 0.30239762207459664, "grad_norm": 2.2968051433563232, "learning_rate": 0.00016354378218525584, "loss": 1.9349, "step": 8444 }, { "epoch": 0.30243343420416496, "grad_norm": 1.6883846521377563, "learning_rate": 0.00016353482555631334, "loss": 1.5545, "step": 8445 }, { "epoch": 0.30246924633373323, "grad_norm": 1.5997358560562134, "learning_rate": 0.00016352586807259168, "loss": 1.9512, "step": 8446 }, { "epoch": 0.3025050584633015, "grad_norm": 1.1980454921722412, "learning_rate": 0.00016351690973421138, "loss": 1.5382, "step": 8447 }, { "epoch": 0.3025408705928698, "grad_norm": 1.3863976001739502, "learning_rate": 0.00016350795054129305, "loss": 1.4965, "step": 8448 }, { "epoch": 0.3025766827224381, "grad_norm": 1.4191679954528809, "learning_rate": 0.00016349899049395713, "loss": 1.6027, "step": 8449 }, { "epoch": 0.30261249485200636, "grad_norm": 1.6715854406356812, "learning_rate": 0.0001634900295923242, "loss": 1.7932, "step": 8450 }, { "epoch": 0.30264830698157463, "grad_norm": 1.6888922452926636, "learning_rate": 0.00016348106783651482, "loss": 1.107, "step": 8451 }, { "epoch": 0.30268411911114296, "grad_norm": 2.2722463607788086, "learning_rate": 0.00016347210522664956, "loss": 1.847, "step": 8452 }, { "epoch": 0.3027199312407112, "grad_norm": 1.5517665147781372, "learning_rate": 0.000163463141762849, "loss": 1.4461, "step": 8453 }, { "epoch": 0.3027557433702795, "grad_norm": 1.6976096630096436, "learning_rate": 0.00016345417744523374, "loss": 1.5298, "step": 8454 }, { "epoch": 0.3027915554998478, "grad_norm": 1.631230354309082, "learning_rate": 0.00016344521227392437, "loss": 1.4179, "step": 8455 }, { "epoch": 0.3028273676294161, "grad_norm": 1.2763921022415161, "learning_rate": 0.00016343624624904151, "loss": 1.7387, "step": 8456 }, { "epoch": 0.30286317975898436, "grad_norm": 2.225421667098999, "learning_rate": 0.00016342727937070577, "loss": 1.4409, "step": 8457 }, { "epoch": 0.3028989918885526, "grad_norm": 2.22019100189209, "learning_rate": 0.0001634183116390378, "loss": 1.9976, "step": 8458 }, { "epoch": 0.30293480401812095, "grad_norm": 1.8093554973602295, "learning_rate": 0.00016340934305415823, "loss": 1.7154, "step": 8459 }, { "epoch": 0.3029706161476892, "grad_norm": 1.4893295764923096, "learning_rate": 0.00016340037361618778, "loss": 1.3436, "step": 8460 }, { "epoch": 0.3030064282772575, "grad_norm": 1.99727201461792, "learning_rate": 0.00016339140332524707, "loss": 1.6495, "step": 8461 }, { "epoch": 0.3030422404068258, "grad_norm": 1.6516386270523071, "learning_rate": 0.0001633824321814568, "loss": 1.7346, "step": 8462 }, { "epoch": 0.3030780525363941, "grad_norm": 1.309701919555664, "learning_rate": 0.00016337346018493768, "loss": 1.4401, "step": 8463 }, { "epoch": 0.30311386466596235, "grad_norm": 1.5844017267227173, "learning_rate": 0.00016336448733581037, "loss": 1.7372, "step": 8464 }, { "epoch": 0.3031496767955306, "grad_norm": 1.5818967819213867, "learning_rate": 0.00016335551363419562, "loss": 1.5704, "step": 8465 }, { "epoch": 0.30318548892509894, "grad_norm": 1.4946575164794922, "learning_rate": 0.00016334653908021415, "loss": 1.6854, "step": 8466 }, { "epoch": 0.3032213010546672, "grad_norm": 2.2354283332824707, "learning_rate": 0.00016333756367398674, "loss": 1.9091, "step": 8467 }, { "epoch": 0.3032571131842355, "grad_norm": 1.3000431060791016, "learning_rate": 0.00016332858741563408, "loss": 1.4807, "step": 8468 }, { "epoch": 0.3032929253138038, "grad_norm": 1.4877946376800537, "learning_rate": 0.00016331961030527698, "loss": 1.3538, "step": 8469 }, { "epoch": 0.3033287374433721, "grad_norm": 1.6280548572540283, "learning_rate": 0.00016331063234303618, "loss": 1.6621, "step": 8470 }, { "epoch": 0.30336454957294035, "grad_norm": 1.3985967636108398, "learning_rate": 0.0001633016535290325, "loss": 1.2751, "step": 8471 }, { "epoch": 0.3034003617025086, "grad_norm": 2.0573692321777344, "learning_rate": 0.00016329267386338674, "loss": 1.4004, "step": 8472 }, { "epoch": 0.30343617383207694, "grad_norm": 1.4988259077072144, "learning_rate": 0.0001632836933462197, "loss": 1.2686, "step": 8473 }, { "epoch": 0.3034719859616452, "grad_norm": 1.387237548828125, "learning_rate": 0.00016327471197765216, "loss": 1.4457, "step": 8474 }, { "epoch": 0.3035077980912135, "grad_norm": 2.1457505226135254, "learning_rate": 0.000163265729757805, "loss": 1.6394, "step": 8475 }, { "epoch": 0.3035436102207818, "grad_norm": 1.7140662670135498, "learning_rate": 0.00016325674668679906, "loss": 1.5671, "step": 8476 }, { "epoch": 0.30357942235035007, "grad_norm": 1.7790052890777588, "learning_rate": 0.00016324776276475518, "loss": 1.6923, "step": 8477 }, { "epoch": 0.30361523447991834, "grad_norm": 1.8531421422958374, "learning_rate": 0.0001632387779917943, "loss": 1.6007, "step": 8478 }, { "epoch": 0.3036510466094866, "grad_norm": 1.4344226121902466, "learning_rate": 0.00016322979236803713, "loss": 1.6599, "step": 8479 }, { "epoch": 0.30368685873905493, "grad_norm": 1.5738317966461182, "learning_rate": 0.00016322080589360472, "loss": 1.7954, "step": 8480 }, { "epoch": 0.3037226708686232, "grad_norm": 1.9308953285217285, "learning_rate": 0.0001632118185686179, "loss": 1.4939, "step": 8481 }, { "epoch": 0.30375848299819147, "grad_norm": 1.450674057006836, "learning_rate": 0.0001632028303931976, "loss": 1.4645, "step": 8482 }, { "epoch": 0.3037942951277598, "grad_norm": 1.5372213125228882, "learning_rate": 0.00016319384136746477, "loss": 1.6913, "step": 8483 }, { "epoch": 0.30383010725732806, "grad_norm": 1.1684625148773193, "learning_rate": 0.0001631848514915403, "loss": 1.5842, "step": 8484 }, { "epoch": 0.30386591938689633, "grad_norm": 1.4962623119354248, "learning_rate": 0.00016317586076554515, "loss": 1.6532, "step": 8485 }, { "epoch": 0.3039017315164646, "grad_norm": 1.7394448518753052, "learning_rate": 0.0001631668691896003, "loss": 1.5523, "step": 8486 }, { "epoch": 0.3039375436460329, "grad_norm": 2.025491237640381, "learning_rate": 0.00016315787676382667, "loss": 1.6246, "step": 8487 }, { "epoch": 0.3039733557756012, "grad_norm": 3.2317962646484375, "learning_rate": 0.0001631488834883453, "loss": 1.5146, "step": 8488 }, { "epoch": 0.30400916790516946, "grad_norm": 2.3871772289276123, "learning_rate": 0.00016313988936327717, "loss": 1.7163, "step": 8489 }, { "epoch": 0.3040449800347378, "grad_norm": 1.6114022731781006, "learning_rate": 0.00016313089438874326, "loss": 1.8252, "step": 8490 }, { "epoch": 0.30408079216430606, "grad_norm": 1.964486002922058, "learning_rate": 0.00016312189856486462, "loss": 1.7783, "step": 8491 }, { "epoch": 0.3041166042938743, "grad_norm": 1.4887510538101196, "learning_rate": 0.00016311290189176223, "loss": 1.5578, "step": 8492 }, { "epoch": 0.3041524164234426, "grad_norm": 2.208726167678833, "learning_rate": 0.00016310390436955716, "loss": 1.51, "step": 8493 }, { "epoch": 0.3041882285530109, "grad_norm": 1.4921742677688599, "learning_rate": 0.00016309490599837045, "loss": 1.77, "step": 8494 }, { "epoch": 0.3042240406825792, "grad_norm": 1.6540600061416626, "learning_rate": 0.00016308590677832315, "loss": 1.4772, "step": 8495 }, { "epoch": 0.30425985281214746, "grad_norm": 1.4199429750442505, "learning_rate": 0.0001630769067095364, "loss": 1.4281, "step": 8496 }, { "epoch": 0.3042956649417158, "grad_norm": 2.591513156890869, "learning_rate": 0.0001630679057921312, "loss": 1.4973, "step": 8497 }, { "epoch": 0.30433147707128405, "grad_norm": 1.456068515777588, "learning_rate": 0.0001630589040262287, "loss": 1.4531, "step": 8498 }, { "epoch": 0.3043672892008523, "grad_norm": 2.0415728092193604, "learning_rate": 0.00016304990141194996, "loss": 1.4656, "step": 8499 }, { "epoch": 0.3044031013304206, "grad_norm": 1.6179795265197754, "learning_rate": 0.00016304089794941614, "loss": 1.7035, "step": 8500 }, { "epoch": 0.3044389134599889, "grad_norm": 1.568784475326538, "learning_rate": 0.00016303189363874835, "loss": 1.3761, "step": 8501 }, { "epoch": 0.3044747255895572, "grad_norm": 1.5610696077346802, "learning_rate": 0.00016302288848006776, "loss": 1.3034, "step": 8502 }, { "epoch": 0.30451053771912545, "grad_norm": 2.025500774383545, "learning_rate": 0.00016301388247349545, "loss": 1.4422, "step": 8503 }, { "epoch": 0.3045463498486938, "grad_norm": 1.6458475589752197, "learning_rate": 0.00016300487561915266, "loss": 1.5523, "step": 8504 }, { "epoch": 0.30458216197826204, "grad_norm": 1.8756228685379028, "learning_rate": 0.00016299586791716054, "loss": 1.642, "step": 8505 }, { "epoch": 0.3046179741078303, "grad_norm": 1.4094475507736206, "learning_rate": 0.00016298685936764026, "loss": 1.3843, "step": 8506 }, { "epoch": 0.3046537862373986, "grad_norm": 1.7004481554031372, "learning_rate": 0.00016297784997071308, "loss": 1.4843, "step": 8507 }, { "epoch": 0.3046895983669669, "grad_norm": 1.459661602973938, "learning_rate": 0.00016296883972650013, "loss": 1.4667, "step": 8508 }, { "epoch": 0.3047254104965352, "grad_norm": 1.3875668048858643, "learning_rate": 0.00016295982863512266, "loss": 1.4236, "step": 8509 }, { "epoch": 0.30476122262610345, "grad_norm": 2.15937876701355, "learning_rate": 0.00016295081669670191, "loss": 1.7024, "step": 8510 }, { "epoch": 0.30479703475567177, "grad_norm": 1.7457246780395508, "learning_rate": 0.00016294180391135914, "loss": 1.3893, "step": 8511 }, { "epoch": 0.30483284688524004, "grad_norm": 1.7229722738265991, "learning_rate": 0.00016293279027921557, "loss": 1.4539, "step": 8512 }, { "epoch": 0.3048686590148083, "grad_norm": 2.1370930671691895, "learning_rate": 0.0001629237758003925, "loss": 1.7631, "step": 8513 }, { "epoch": 0.3049044711443766, "grad_norm": 1.49868905544281, "learning_rate": 0.00016291476047501115, "loss": 1.6417, "step": 8514 }, { "epoch": 0.3049402832739449, "grad_norm": 1.5884264707565308, "learning_rate": 0.0001629057443031929, "loss": 1.4924, "step": 8515 }, { "epoch": 0.30497609540351317, "grad_norm": 1.7246391773223877, "learning_rate": 0.000162896727285059, "loss": 1.4056, "step": 8516 }, { "epoch": 0.30501190753308144, "grad_norm": 1.9741127490997314, "learning_rate": 0.00016288770942073075, "loss": 1.6657, "step": 8517 }, { "epoch": 0.30504771966264976, "grad_norm": 1.5608097314834595, "learning_rate": 0.00016287869071032952, "loss": 1.6203, "step": 8518 }, { "epoch": 0.30508353179221803, "grad_norm": 1.6867769956588745, "learning_rate": 0.00016286967115397655, "loss": 1.4592, "step": 8519 }, { "epoch": 0.3051193439217863, "grad_norm": 1.8151174783706665, "learning_rate": 0.00016286065075179332, "loss": 1.7879, "step": 8520 }, { "epoch": 0.30515515605135457, "grad_norm": 2.2028374671936035, "learning_rate": 0.00016285162950390104, "loss": 1.4099, "step": 8521 }, { "epoch": 0.3051909681809229, "grad_norm": 2.5838005542755127, "learning_rate": 0.00016284260741042123, "loss": 1.4674, "step": 8522 }, { "epoch": 0.30522678031049116, "grad_norm": 1.9016218185424805, "learning_rate": 0.00016283358447147516, "loss": 1.8284, "step": 8523 }, { "epoch": 0.30526259244005943, "grad_norm": 2.4954068660736084, "learning_rate": 0.0001628245606871843, "loss": 1.5852, "step": 8524 }, { "epoch": 0.30529840456962776, "grad_norm": 2.0137617588043213, "learning_rate": 0.00016281553605766998, "loss": 1.7631, "step": 8525 }, { "epoch": 0.305334216699196, "grad_norm": 2.0389068126678467, "learning_rate": 0.00016280651058305363, "loss": 1.661, "step": 8526 }, { "epoch": 0.3053700288287643, "grad_norm": 1.6179884672164917, "learning_rate": 0.00016279748426345673, "loss": 1.5391, "step": 8527 }, { "epoch": 0.30540584095833256, "grad_norm": 1.4547618627548218, "learning_rate": 0.0001627884570990007, "loss": 1.6447, "step": 8528 }, { "epoch": 0.3054416530879009, "grad_norm": 1.4951916933059692, "learning_rate": 0.0001627794290898069, "loss": 1.714, "step": 8529 }, { "epoch": 0.30547746521746916, "grad_norm": 1.8673044443130493, "learning_rate": 0.00016277040023599692, "loss": 1.6272, "step": 8530 }, { "epoch": 0.3055132773470374, "grad_norm": 1.5993306636810303, "learning_rate": 0.00016276137053769217, "loss": 1.3022, "step": 8531 }, { "epoch": 0.30554908947660575, "grad_norm": 1.4907748699188232, "learning_rate": 0.0001627523399950141, "loss": 1.606, "step": 8532 }, { "epoch": 0.305584901606174, "grad_norm": 1.347849726676941, "learning_rate": 0.00016274330860808426, "loss": 1.5253, "step": 8533 }, { "epoch": 0.3056207137357423, "grad_norm": 1.296706199645996, "learning_rate": 0.00016273427637702415, "loss": 1.1995, "step": 8534 }, { "epoch": 0.30565652586531056, "grad_norm": 1.5187026262283325, "learning_rate": 0.00016272524330195525, "loss": 1.1413, "step": 8535 }, { "epoch": 0.3056923379948789, "grad_norm": 2.58811616897583, "learning_rate": 0.00016271620938299912, "loss": 1.7734, "step": 8536 }, { "epoch": 0.30572815012444715, "grad_norm": 1.5693305730819702, "learning_rate": 0.0001627071746202773, "loss": 1.6448, "step": 8537 }, { "epoch": 0.3057639622540154, "grad_norm": 1.9932421445846558, "learning_rate": 0.00016269813901391132, "loss": 1.6241, "step": 8538 }, { "epoch": 0.30579977438358374, "grad_norm": 2.0719470977783203, "learning_rate": 0.00016268910256402277, "loss": 1.5669, "step": 8539 }, { "epoch": 0.305835586513152, "grad_norm": 1.9695795774459839, "learning_rate": 0.00016268006527073322, "loss": 1.5724, "step": 8540 }, { "epoch": 0.3058713986427203, "grad_norm": 1.7500296831130981, "learning_rate": 0.00016267102713416417, "loss": 1.3741, "step": 8541 }, { "epoch": 0.30590721077228855, "grad_norm": 1.8860368728637695, "learning_rate": 0.00016266198815443738, "loss": 1.4433, "step": 8542 }, { "epoch": 0.3059430229018569, "grad_norm": 1.7154771089553833, "learning_rate": 0.00016265294833167434, "loss": 1.5552, "step": 8543 }, { "epoch": 0.30597883503142514, "grad_norm": 1.621751070022583, "learning_rate": 0.0001626439076659967, "loss": 1.4277, "step": 8544 }, { "epoch": 0.3060146471609934, "grad_norm": 1.4160710573196411, "learning_rate": 0.00016263486615752606, "loss": 1.2813, "step": 8545 }, { "epoch": 0.30605045929056174, "grad_norm": 1.513929009437561, "learning_rate": 0.00016262582380638407, "loss": 1.4992, "step": 8546 }, { "epoch": 0.30608627142013, "grad_norm": 1.4435629844665527, "learning_rate": 0.00016261678061269244, "loss": 1.5759, "step": 8547 }, { "epoch": 0.3061220835496983, "grad_norm": 1.9000530242919922, "learning_rate": 0.0001626077365765728, "loss": 1.575, "step": 8548 }, { "epoch": 0.30615789567926655, "grad_norm": 1.5278146266937256, "learning_rate": 0.00016259869169814678, "loss": 1.8735, "step": 8549 }, { "epoch": 0.30619370780883487, "grad_norm": 1.8656506538391113, "learning_rate": 0.00016258964597753615, "loss": 1.4926, "step": 8550 }, { "epoch": 0.30622951993840314, "grad_norm": 2.041372060775757, "learning_rate": 0.00016258059941486259, "loss": 1.2913, "step": 8551 }, { "epoch": 0.3062653320679714, "grad_norm": 1.8052326440811157, "learning_rate": 0.00016257155201024776, "loss": 1.5017, "step": 8552 }, { "epoch": 0.30630114419753973, "grad_norm": 2.2449746131896973, "learning_rate": 0.0001625625037638134, "loss": 1.531, "step": 8553 }, { "epoch": 0.306336956327108, "grad_norm": 1.6481692790985107, "learning_rate": 0.00016255345467568126, "loss": 1.5007, "step": 8554 }, { "epoch": 0.30637276845667627, "grad_norm": 2.4112162590026855, "learning_rate": 0.00016254440474597307, "loss": 1.6164, "step": 8555 }, { "epoch": 0.30640858058624454, "grad_norm": 1.6740220785140991, "learning_rate": 0.0001625353539748106, "loss": 1.4353, "step": 8556 }, { "epoch": 0.30644439271581286, "grad_norm": 2.5266458988189697, "learning_rate": 0.00016252630236231557, "loss": 1.3675, "step": 8557 }, { "epoch": 0.30648020484538113, "grad_norm": 1.4388402700424194, "learning_rate": 0.00016251724990860983, "loss": 1.6966, "step": 8558 }, { "epoch": 0.3065160169749494, "grad_norm": 1.701951026916504, "learning_rate": 0.00016250819661381516, "loss": 1.5309, "step": 8559 }, { "epoch": 0.3065518291045177, "grad_norm": 1.3928033113479614, "learning_rate": 0.0001624991424780533, "loss": 1.1926, "step": 8560 }, { "epoch": 0.306587641234086, "grad_norm": 1.6623480319976807, "learning_rate": 0.0001624900875014461, "loss": 1.5179, "step": 8561 }, { "epoch": 0.30662345336365426, "grad_norm": 1.2392915487289429, "learning_rate": 0.0001624810316841154, "loss": 1.6586, "step": 8562 }, { "epoch": 0.30665926549322253, "grad_norm": 1.4448860883712769, "learning_rate": 0.000162471975026183, "loss": 1.3141, "step": 8563 }, { "epoch": 0.30669507762279086, "grad_norm": 1.6523834466934204, "learning_rate": 0.0001624629175277707, "loss": 1.4945, "step": 8564 }, { "epoch": 0.3067308897523591, "grad_norm": 1.7651172876358032, "learning_rate": 0.0001624538591890005, "loss": 1.4052, "step": 8565 }, { "epoch": 0.3067667018819274, "grad_norm": 1.7851136922836304, "learning_rate": 0.00016244480000999416, "loss": 1.2272, "step": 8566 }, { "epoch": 0.3068025140114957, "grad_norm": 1.844192385673523, "learning_rate": 0.0001624357399908736, "loss": 1.9858, "step": 8567 }, { "epoch": 0.306838326141064, "grad_norm": 1.3948429822921753, "learning_rate": 0.00016242667913176064, "loss": 1.6287, "step": 8568 }, { "epoch": 0.30687413827063226, "grad_norm": 2.166774034500122, "learning_rate": 0.00016241761743277726, "loss": 1.682, "step": 8569 }, { "epoch": 0.3069099504002005, "grad_norm": 2.1550753116607666, "learning_rate": 0.00016240855489404535, "loss": 1.3612, "step": 8570 }, { "epoch": 0.30694576252976885, "grad_norm": 1.3740218877792358, "learning_rate": 0.00016239949151568688, "loss": 1.5035, "step": 8571 }, { "epoch": 0.3069815746593371, "grad_norm": 1.7257821559906006, "learning_rate": 0.0001623904272978237, "loss": 1.5221, "step": 8572 }, { "epoch": 0.3070173867889054, "grad_norm": 2.2711541652679443, "learning_rate": 0.00016238136224057777, "loss": 1.6717, "step": 8573 }, { "epoch": 0.3070531989184737, "grad_norm": 1.7106958627700806, "learning_rate": 0.00016237229634407112, "loss": 1.8193, "step": 8574 }, { "epoch": 0.307089011048042, "grad_norm": 1.976996898651123, "learning_rate": 0.0001623632296084257, "loss": 1.6273, "step": 8575 }, { "epoch": 0.30712482317761025, "grad_norm": 2.0148656368255615, "learning_rate": 0.0001623541620337634, "loss": 1.7886, "step": 8576 }, { "epoch": 0.3071606353071785, "grad_norm": 1.4920234680175781, "learning_rate": 0.00016234509362020633, "loss": 1.6572, "step": 8577 }, { "epoch": 0.30719644743674684, "grad_norm": 2.609459638595581, "learning_rate": 0.00016233602436787644, "loss": 1.4282, "step": 8578 }, { "epoch": 0.3072322595663151, "grad_norm": 1.3255739212036133, "learning_rate": 0.00016232695427689575, "loss": 1.5997, "step": 8579 }, { "epoch": 0.3072680716958834, "grad_norm": 2.2817599773406982, "learning_rate": 0.00016231788334738627, "loss": 1.4272, "step": 8580 }, { "epoch": 0.3073038838254517, "grad_norm": 1.5442206859588623, "learning_rate": 0.00016230881157947006, "loss": 1.5387, "step": 8581 }, { "epoch": 0.30733969595502, "grad_norm": 1.3098913431167603, "learning_rate": 0.00016229973897326919, "loss": 1.5564, "step": 8582 }, { "epoch": 0.30737550808458824, "grad_norm": 1.755606770515442, "learning_rate": 0.00016229066552890563, "loss": 1.6957, "step": 8583 }, { "epoch": 0.3074113202141565, "grad_norm": 1.984049916267395, "learning_rate": 0.0001622815912465016, "loss": 1.314, "step": 8584 }, { "epoch": 0.30744713234372484, "grad_norm": 1.4831622838974, "learning_rate": 0.00016227251612617902, "loss": 1.3996, "step": 8585 }, { "epoch": 0.3074829444732931, "grad_norm": 2.159074306488037, "learning_rate": 0.0001622634401680601, "loss": 1.7296, "step": 8586 }, { "epoch": 0.3075187566028614, "grad_norm": 1.5581837892532349, "learning_rate": 0.0001622543633722669, "loss": 1.3766, "step": 8587 }, { "epoch": 0.3075545687324297, "grad_norm": 1.5768377780914307, "learning_rate": 0.00016224528573892153, "loss": 1.5968, "step": 8588 }, { "epoch": 0.30759038086199797, "grad_norm": 1.7396345138549805, "learning_rate": 0.00016223620726814615, "loss": 1.461, "step": 8589 }, { "epoch": 0.30762619299156624, "grad_norm": 2.5793988704681396, "learning_rate": 0.00016222712796006285, "loss": 1.4118, "step": 8590 }, { "epoch": 0.3076620051211345, "grad_norm": 1.5253145694732666, "learning_rate": 0.00016221804781479384, "loss": 1.4673, "step": 8591 }, { "epoch": 0.30769781725070283, "grad_norm": 1.6572800874710083, "learning_rate": 0.00016220896683246126, "loss": 1.187, "step": 8592 }, { "epoch": 0.3077336293802711, "grad_norm": 2.2144687175750732, "learning_rate": 0.00016219988501318727, "loss": 1.6095, "step": 8593 }, { "epoch": 0.30776944150983937, "grad_norm": 2.0595297813415527, "learning_rate": 0.00016219080235709403, "loss": 1.8326, "step": 8594 }, { "epoch": 0.3078052536394077, "grad_norm": 1.579040288925171, "learning_rate": 0.0001621817188643038, "loss": 1.5968, "step": 8595 }, { "epoch": 0.30784106576897596, "grad_norm": 1.8620355129241943, "learning_rate": 0.0001621726345349387, "loss": 1.5707, "step": 8596 }, { "epoch": 0.30787687789854423, "grad_norm": 1.648568868637085, "learning_rate": 0.00016216354936912105, "loss": 1.6395, "step": 8597 }, { "epoch": 0.3079126900281125, "grad_norm": 1.642183542251587, "learning_rate": 0.000162154463366973, "loss": 1.7484, "step": 8598 }, { "epoch": 0.3079485021576808, "grad_norm": 1.5687962770462036, "learning_rate": 0.00016214537652861687, "loss": 1.1456, "step": 8599 }, { "epoch": 0.3079843142872491, "grad_norm": 1.3825608491897583, "learning_rate": 0.00016213628885417483, "loss": 1.4397, "step": 8600 }, { "epoch": 0.30802012641681736, "grad_norm": 1.5594370365142822, "learning_rate": 0.00016212720034376914, "loss": 1.6672, "step": 8601 }, { "epoch": 0.3080559385463857, "grad_norm": 1.4455797672271729, "learning_rate": 0.00016211811099752215, "loss": 1.5133, "step": 8602 }, { "epoch": 0.30809175067595396, "grad_norm": 1.868511438369751, "learning_rate": 0.00016210902081555605, "loss": 1.5547, "step": 8603 }, { "epoch": 0.3081275628055222, "grad_norm": 1.375169277191162, "learning_rate": 0.00016209992979799326, "loss": 1.4245, "step": 8604 }, { "epoch": 0.3081633749350905, "grad_norm": 2.5195486545562744, "learning_rate": 0.00016209083794495598, "loss": 1.76, "step": 8605 }, { "epoch": 0.3081991870646588, "grad_norm": 1.5071332454681396, "learning_rate": 0.00016208174525656656, "loss": 1.2215, "step": 8606 }, { "epoch": 0.3082349991942271, "grad_norm": 1.7664871215820312, "learning_rate": 0.00016207265173294734, "loss": 1.6029, "step": 8607 }, { "epoch": 0.30827081132379536, "grad_norm": 2.763017416000366, "learning_rate": 0.00016206355737422067, "loss": 1.5892, "step": 8608 }, { "epoch": 0.3083066234533637, "grad_norm": 1.5861375331878662, "learning_rate": 0.00016205446218050892, "loss": 1.0838, "step": 8609 }, { "epoch": 0.30834243558293195, "grad_norm": 1.6708229780197144, "learning_rate": 0.00016204536615193439, "loss": 1.7046, "step": 8610 }, { "epoch": 0.3083782477125002, "grad_norm": 1.6125863790512085, "learning_rate": 0.00016203626928861948, "loss": 1.6065, "step": 8611 }, { "epoch": 0.3084140598420685, "grad_norm": 2.4662134647369385, "learning_rate": 0.00016202717159068662, "loss": 1.631, "step": 8612 }, { "epoch": 0.3084498719716368, "grad_norm": 1.8300012350082397, "learning_rate": 0.00016201807305825817, "loss": 1.49, "step": 8613 }, { "epoch": 0.3084856841012051, "grad_norm": 1.6964879035949707, "learning_rate": 0.0001620089736914565, "loss": 1.3984, "step": 8614 }, { "epoch": 0.30852149623077335, "grad_norm": 1.575770616531372, "learning_rate": 0.0001619998734904041, "loss": 1.6827, "step": 8615 }, { "epoch": 0.3085573083603416, "grad_norm": 1.308549165725708, "learning_rate": 0.00016199077245522341, "loss": 1.5704, "step": 8616 }, { "epoch": 0.30859312048990994, "grad_norm": 2.0342214107513428, "learning_rate": 0.00016198167058603682, "loss": 1.6039, "step": 8617 }, { "epoch": 0.3086289326194782, "grad_norm": 1.666513442993164, "learning_rate": 0.0001619725678829668, "loss": 1.5431, "step": 8618 }, { "epoch": 0.3086647447490465, "grad_norm": 1.700683832168579, "learning_rate": 0.0001619634643461358, "loss": 1.6518, "step": 8619 }, { "epoch": 0.3087005568786148, "grad_norm": 2.255946159362793, "learning_rate": 0.00016195435997566632, "loss": 1.7746, "step": 8620 }, { "epoch": 0.3087363690081831, "grad_norm": 1.617912769317627, "learning_rate": 0.00016194525477168087, "loss": 1.6655, "step": 8621 }, { "epoch": 0.30877218113775134, "grad_norm": 2.162280321121216, "learning_rate": 0.0001619361487343019, "loss": 1.5503, "step": 8622 }, { "epoch": 0.3088079932673196, "grad_norm": 1.8481221199035645, "learning_rate": 0.00016192704186365195, "loss": 1.5285, "step": 8623 }, { "epoch": 0.30884380539688794, "grad_norm": 1.9384018182754517, "learning_rate": 0.00016191793415985353, "loss": 1.9594, "step": 8624 }, { "epoch": 0.3088796175264562, "grad_norm": 1.9377477169036865, "learning_rate": 0.00016190882562302914, "loss": 1.5948, "step": 8625 }, { "epoch": 0.3089154296560245, "grad_norm": 1.4632514715194702, "learning_rate": 0.0001618997162533014, "loss": 1.6144, "step": 8626 }, { "epoch": 0.3089512417855928, "grad_norm": 1.6265308856964111, "learning_rate": 0.0001618906060507928, "loss": 1.6253, "step": 8627 }, { "epoch": 0.30898705391516107, "grad_norm": 1.8380078077316284, "learning_rate": 0.00016188149501562596, "loss": 1.287, "step": 8628 }, { "epoch": 0.30902286604472934, "grad_norm": 1.5310187339782715, "learning_rate": 0.00016187238314792338, "loss": 1.3016, "step": 8629 }, { "epoch": 0.3090586781742976, "grad_norm": 1.4253685474395752, "learning_rate": 0.00016186327044780772, "loss": 1.4775, "step": 8630 }, { "epoch": 0.30909449030386593, "grad_norm": 2.2036783695220947, "learning_rate": 0.00016185415691540156, "loss": 1.5592, "step": 8631 }, { "epoch": 0.3091303024334342, "grad_norm": 2.154386281967163, "learning_rate": 0.0001618450425508275, "loss": 1.4297, "step": 8632 }, { "epoch": 0.30916611456300247, "grad_norm": 1.7909826040267944, "learning_rate": 0.00016183592735420817, "loss": 1.3135, "step": 8633 }, { "epoch": 0.3092019266925708, "grad_norm": 1.5986603498458862, "learning_rate": 0.0001618268113256662, "loss": 1.6078, "step": 8634 }, { "epoch": 0.30923773882213906, "grad_norm": 1.9522628784179688, "learning_rate": 0.0001618176944653242, "loss": 1.6146, "step": 8635 }, { "epoch": 0.30927355095170733, "grad_norm": 1.3854929208755493, "learning_rate": 0.0001618085767733049, "loss": 1.551, "step": 8636 }, { "epoch": 0.3093093630812756, "grad_norm": 1.41969895362854, "learning_rate": 0.0001617994582497309, "loss": 1.3639, "step": 8637 }, { "epoch": 0.3093451752108439, "grad_norm": 2.2371199131011963, "learning_rate": 0.00016179033889472493, "loss": 1.5884, "step": 8638 }, { "epoch": 0.3093809873404122, "grad_norm": 1.718151330947876, "learning_rate": 0.00016178121870840965, "loss": 1.5665, "step": 8639 }, { "epoch": 0.30941679946998046, "grad_norm": 1.7268788814544678, "learning_rate": 0.00016177209769090774, "loss": 1.6803, "step": 8640 }, { "epoch": 0.3094526115995488, "grad_norm": 1.3030462265014648, "learning_rate": 0.00016176297584234196, "loss": 1.5642, "step": 8641 }, { "epoch": 0.30948842372911706, "grad_norm": 1.4846490621566772, "learning_rate": 0.00016175385316283502, "loss": 1.4474, "step": 8642 }, { "epoch": 0.3095242358586853, "grad_norm": 1.95109224319458, "learning_rate": 0.00016174472965250965, "loss": 1.6802, "step": 8643 }, { "epoch": 0.3095600479882536, "grad_norm": 2.3213067054748535, "learning_rate": 0.00016173560531148855, "loss": 1.6874, "step": 8644 }, { "epoch": 0.3095958601178219, "grad_norm": 1.6281256675720215, "learning_rate": 0.0001617264801398945, "loss": 1.6978, "step": 8645 }, { "epoch": 0.3096316722473902, "grad_norm": 1.7163335084915161, "learning_rate": 0.0001617173541378503, "loss": 1.507, "step": 8646 }, { "epoch": 0.30966748437695846, "grad_norm": 2.341641426086426, "learning_rate": 0.0001617082273054787, "loss": 1.6396, "step": 8647 }, { "epoch": 0.3097032965065268, "grad_norm": 2.265071392059326, "learning_rate": 0.00016169909964290256, "loss": 1.383, "step": 8648 }, { "epoch": 0.30973910863609505, "grad_norm": 2.135606288909912, "learning_rate": 0.00016168997115024458, "loss": 1.6133, "step": 8649 }, { "epoch": 0.3097749207656633, "grad_norm": 1.6686729192733765, "learning_rate": 0.0001616808418276276, "loss": 1.4704, "step": 8650 }, { "epoch": 0.3098107328952316, "grad_norm": 1.7093509435653687, "learning_rate": 0.00016167171167517447, "loss": 1.6578, "step": 8651 }, { "epoch": 0.3098465450247999, "grad_norm": 1.4961060285568237, "learning_rate": 0.00016166258069300803, "loss": 1.5668, "step": 8652 }, { "epoch": 0.3098823571543682, "grad_norm": 1.3876949548721313, "learning_rate": 0.00016165344888125106, "loss": 1.6351, "step": 8653 }, { "epoch": 0.30991816928393645, "grad_norm": 1.5020776987075806, "learning_rate": 0.00016164431624002647, "loss": 1.4318, "step": 8654 }, { "epoch": 0.3099539814135048, "grad_norm": 1.5066617727279663, "learning_rate": 0.00016163518276945715, "loss": 1.6516, "step": 8655 }, { "epoch": 0.30998979354307304, "grad_norm": 1.377149224281311, "learning_rate": 0.00016162604846966594, "loss": 1.5357, "step": 8656 }, { "epoch": 0.3100256056726413, "grad_norm": 1.648619294166565, "learning_rate": 0.0001616169133407757, "loss": 1.9226, "step": 8657 }, { "epoch": 0.3100614178022096, "grad_norm": 1.3753312826156616, "learning_rate": 0.00016160777738290945, "loss": 1.5631, "step": 8658 }, { "epoch": 0.3100972299317779, "grad_norm": 1.6483653783798218, "learning_rate": 0.00016159864059618997, "loss": 1.762, "step": 8659 }, { "epoch": 0.3101330420613462, "grad_norm": 1.7791167497634888, "learning_rate": 0.00016158950298074022, "loss": 1.575, "step": 8660 }, { "epoch": 0.31016885419091444, "grad_norm": 2.1656835079193115, "learning_rate": 0.00016158036453668318, "loss": 1.6359, "step": 8661 }, { "epoch": 0.31020466632048277, "grad_norm": 1.743486762046814, "learning_rate": 0.00016157122526414176, "loss": 1.8204, "step": 8662 }, { "epoch": 0.31024047845005104, "grad_norm": 1.796557903289795, "learning_rate": 0.00016156208516323895, "loss": 1.3592, "step": 8663 }, { "epoch": 0.3102762905796193, "grad_norm": 1.2790051698684692, "learning_rate": 0.00016155294423409768, "loss": 1.476, "step": 8664 }, { "epoch": 0.3103121027091876, "grad_norm": 1.7429996728897095, "learning_rate": 0.00016154380247684094, "loss": 1.5231, "step": 8665 }, { "epoch": 0.3103479148387559, "grad_norm": 2.2776238918304443, "learning_rate": 0.00016153465989159172, "loss": 1.5724, "step": 8666 }, { "epoch": 0.31038372696832417, "grad_norm": 1.5032392740249634, "learning_rate": 0.00016152551647847304, "loss": 1.7274, "step": 8667 }, { "epoch": 0.31041953909789244, "grad_norm": 1.6818513870239258, "learning_rate": 0.00016151637223760785, "loss": 1.4738, "step": 8668 }, { "epoch": 0.31045535122746076, "grad_norm": 1.457304835319519, "learning_rate": 0.0001615072271691193, "loss": 1.5367, "step": 8669 }, { "epoch": 0.31049116335702903, "grad_norm": 1.749411702156067, "learning_rate": 0.00016149808127313025, "loss": 1.6854, "step": 8670 }, { "epoch": 0.3105269754865973, "grad_norm": 1.698837399482727, "learning_rate": 0.00016148893454976393, "loss": 1.5631, "step": 8671 }, { "epoch": 0.31056278761616557, "grad_norm": 1.9084186553955078, "learning_rate": 0.00016147978699914325, "loss": 1.3116, "step": 8672 }, { "epoch": 0.3105985997457339, "grad_norm": 1.5788426399230957, "learning_rate": 0.00016147063862139138, "loss": 1.5894, "step": 8673 }, { "epoch": 0.31063441187530216, "grad_norm": 1.705234169960022, "learning_rate": 0.00016146148941663136, "loss": 1.4553, "step": 8674 }, { "epoch": 0.31067022400487043, "grad_norm": 1.3608901500701904, "learning_rate": 0.00016145233938498626, "loss": 1.5954, "step": 8675 }, { "epoch": 0.31070603613443876, "grad_norm": 1.920832872390747, "learning_rate": 0.00016144318852657921, "loss": 1.6358, "step": 8676 }, { "epoch": 0.310741848264007, "grad_norm": 1.759215235710144, "learning_rate": 0.00016143403684153328, "loss": 1.6726, "step": 8677 }, { "epoch": 0.3107776603935753, "grad_norm": 1.8007652759552002, "learning_rate": 0.00016142488432997168, "loss": 1.6163, "step": 8678 }, { "epoch": 0.31081347252314356, "grad_norm": 2.198650598526001, "learning_rate": 0.00016141573099201744, "loss": 1.4736, "step": 8679 }, { "epoch": 0.3108492846527119, "grad_norm": 1.4647966623306274, "learning_rate": 0.00016140657682779384, "loss": 1.2301, "step": 8680 }, { "epoch": 0.31088509678228016, "grad_norm": 1.752133846282959, "learning_rate": 0.0001613974218374239, "loss": 1.558, "step": 8681 }, { "epoch": 0.3109209089118484, "grad_norm": 1.8326823711395264, "learning_rate": 0.00016138826602103085, "loss": 1.8069, "step": 8682 }, { "epoch": 0.31095672104141675, "grad_norm": 1.3511853218078613, "learning_rate": 0.00016137910937873788, "loss": 1.5138, "step": 8683 }, { "epoch": 0.310992533170985, "grad_norm": 1.8145192861557007, "learning_rate": 0.00016136995191066818, "loss": 1.7591, "step": 8684 }, { "epoch": 0.3110283453005533, "grad_norm": 1.5319371223449707, "learning_rate": 0.0001613607936169449, "loss": 1.8161, "step": 8685 }, { "epoch": 0.31106415743012156, "grad_norm": 1.9978666305541992, "learning_rate": 0.00016135163449769132, "loss": 1.4615, "step": 8686 }, { "epoch": 0.3110999695596899, "grad_norm": 2.378784418106079, "learning_rate": 0.0001613424745530306, "loss": 1.5786, "step": 8687 }, { "epoch": 0.31113578168925815, "grad_norm": 2.055859327316284, "learning_rate": 0.00016133331378308604, "loss": 1.6159, "step": 8688 }, { "epoch": 0.3111715938188264, "grad_norm": 1.796878695487976, "learning_rate": 0.00016132415218798085, "loss": 1.6153, "step": 8689 }, { "epoch": 0.31120740594839474, "grad_norm": 1.5123406648635864, "learning_rate": 0.0001613149897678383, "loss": 1.556, "step": 8690 }, { "epoch": 0.311243218077963, "grad_norm": 2.314534902572632, "learning_rate": 0.00016130582652278163, "loss": 1.4182, "step": 8691 }, { "epoch": 0.3112790302075313, "grad_norm": 1.2278296947479248, "learning_rate": 0.0001612966624529342, "loss": 1.6972, "step": 8692 }, { "epoch": 0.31131484233709955, "grad_norm": 1.6352144479751587, "learning_rate": 0.0001612874975584192, "loss": 1.4284, "step": 8693 }, { "epoch": 0.3113506544666679, "grad_norm": 1.5539416074752808, "learning_rate": 0.00016127833183936, "loss": 1.524, "step": 8694 }, { "epoch": 0.31138646659623614, "grad_norm": 2.1352217197418213, "learning_rate": 0.00016126916529587987, "loss": 1.5975, "step": 8695 }, { "epoch": 0.3114222787258044, "grad_norm": 1.6609954833984375, "learning_rate": 0.00016125999792810213, "loss": 1.26, "step": 8696 }, { "epoch": 0.31145809085537274, "grad_norm": 1.37396240234375, "learning_rate": 0.00016125082973615017, "loss": 1.2051, "step": 8697 }, { "epoch": 0.311493902984941, "grad_norm": 2.057098865509033, "learning_rate": 0.00016124166072014728, "loss": 1.5141, "step": 8698 }, { "epoch": 0.3115297151145093, "grad_norm": 1.9792077541351318, "learning_rate": 0.00016123249088021688, "loss": 1.6889, "step": 8699 }, { "epoch": 0.31156552724407754, "grad_norm": 1.6447113752365112, "learning_rate": 0.00016122332021648226, "loss": 1.4876, "step": 8700 }, { "epoch": 0.31160133937364587, "grad_norm": 1.9308607578277588, "learning_rate": 0.00016121414872906687, "loss": 1.4819, "step": 8701 }, { "epoch": 0.31163715150321414, "grad_norm": 1.500744342803955, "learning_rate": 0.00016120497641809408, "loss": 1.5922, "step": 8702 }, { "epoch": 0.3116729636327824, "grad_norm": 2.0981481075286865, "learning_rate": 0.00016119580328368725, "loss": 1.7535, "step": 8703 }, { "epoch": 0.31170877576235073, "grad_norm": 1.7200511693954468, "learning_rate": 0.0001611866293259698, "loss": 1.3421, "step": 8704 }, { "epoch": 0.311744587891919, "grad_norm": 1.9420268535614014, "learning_rate": 0.00016117745454506522, "loss": 1.446, "step": 8705 }, { "epoch": 0.31178040002148727, "grad_norm": 2.217729091644287, "learning_rate": 0.00016116827894109686, "loss": 1.2806, "step": 8706 }, { "epoch": 0.31181621215105554, "grad_norm": 1.7282236814498901, "learning_rate": 0.00016115910251418827, "loss": 1.5973, "step": 8707 }, { "epoch": 0.31185202428062386, "grad_norm": 1.4423658847808838, "learning_rate": 0.0001611499252644628, "loss": 1.5761, "step": 8708 }, { "epoch": 0.31188783641019213, "grad_norm": 1.5264742374420166, "learning_rate": 0.00016114074719204396, "loss": 1.5954, "step": 8709 }, { "epoch": 0.3119236485397604, "grad_norm": 2.1491074562072754, "learning_rate": 0.00016113156829705526, "loss": 1.5177, "step": 8710 }, { "epoch": 0.3119594606693287, "grad_norm": 1.6613092422485352, "learning_rate": 0.00016112238857962017, "loss": 1.6098, "step": 8711 }, { "epoch": 0.311995272798897, "grad_norm": 1.7055860757827759, "learning_rate": 0.00016111320803986217, "loss": 1.7401, "step": 8712 }, { "epoch": 0.31203108492846526, "grad_norm": 1.538976788520813, "learning_rate": 0.00016110402667790475, "loss": 1.7231, "step": 8713 }, { "epoch": 0.31206689705803353, "grad_norm": 2.240173816680908, "learning_rate": 0.0001610948444938715, "loss": 1.8553, "step": 8714 }, { "epoch": 0.31210270918760186, "grad_norm": 1.49239182472229, "learning_rate": 0.00016108566148788594, "loss": 1.2735, "step": 8715 }, { "epoch": 0.3121385213171701, "grad_norm": 2.0355594158172607, "learning_rate": 0.00016107647766007159, "loss": 1.5694, "step": 8716 }, { "epoch": 0.3121743334467384, "grad_norm": 2.6552181243896484, "learning_rate": 0.000161067293010552, "loss": 1.4701, "step": 8717 }, { "epoch": 0.3122101455763067, "grad_norm": 1.752543568611145, "learning_rate": 0.00016105810753945076, "loss": 1.6328, "step": 8718 }, { "epoch": 0.312245957705875, "grad_norm": 1.854783296585083, "learning_rate": 0.00016104892124689147, "loss": 1.723, "step": 8719 }, { "epoch": 0.31228176983544326, "grad_norm": 1.347758412361145, "learning_rate": 0.00016103973413299767, "loss": 1.4929, "step": 8720 }, { "epoch": 0.3123175819650115, "grad_norm": 1.979087471961975, "learning_rate": 0.00016103054619789298, "loss": 1.639, "step": 8721 }, { "epoch": 0.31235339409457985, "grad_norm": 2.2528393268585205, "learning_rate": 0.00016102135744170098, "loss": 1.7851, "step": 8722 }, { "epoch": 0.3123892062241481, "grad_norm": 2.4195220470428467, "learning_rate": 0.00016101216786454538, "loss": 1.6844, "step": 8723 }, { "epoch": 0.3124250183537164, "grad_norm": 2.5333340167999268, "learning_rate": 0.00016100297746654975, "loss": 1.7123, "step": 8724 }, { "epoch": 0.3124608304832847, "grad_norm": 1.6024668216705322, "learning_rate": 0.00016099378624783773, "loss": 1.7999, "step": 8725 }, { "epoch": 0.312496642612853, "grad_norm": 1.7024332284927368, "learning_rate": 0.00016098459420853302, "loss": 1.881, "step": 8726 }, { "epoch": 0.31253245474242125, "grad_norm": 2.0940849781036377, "learning_rate": 0.00016097540134875924, "loss": 1.5664, "step": 8727 }, { "epoch": 0.3125682668719895, "grad_norm": 1.422271728515625, "learning_rate": 0.00016096620766864011, "loss": 1.5992, "step": 8728 }, { "epoch": 0.31260407900155784, "grad_norm": 2.1082029342651367, "learning_rate": 0.00016095701316829925, "loss": 1.2487, "step": 8729 }, { "epoch": 0.3126398911311261, "grad_norm": 1.6328003406524658, "learning_rate": 0.00016094781784786044, "loss": 1.5948, "step": 8730 }, { "epoch": 0.3126757032606944, "grad_norm": 1.901384949684143, "learning_rate": 0.00016093862170744733, "loss": 1.5415, "step": 8731 }, { "epoch": 0.3127115153902627, "grad_norm": 1.7360332012176514, "learning_rate": 0.00016092942474718372, "loss": 1.7264, "step": 8732 }, { "epoch": 0.312747327519831, "grad_norm": 1.5709059238433838, "learning_rate": 0.00016092022696719327, "loss": 1.798, "step": 8733 }, { "epoch": 0.31278313964939924, "grad_norm": 1.9449841976165771, "learning_rate": 0.00016091102836759974, "loss": 1.915, "step": 8734 }, { "epoch": 0.3128189517789675, "grad_norm": 1.2012020349502563, "learning_rate": 0.00016090182894852687, "loss": 1.1363, "step": 8735 }, { "epoch": 0.31285476390853584, "grad_norm": 2.4898271560668945, "learning_rate": 0.0001608926287100985, "loss": 1.7112, "step": 8736 }, { "epoch": 0.3128905760381041, "grad_norm": 2.0908682346343994, "learning_rate": 0.00016088342765243832, "loss": 1.8249, "step": 8737 }, { "epoch": 0.3129263881676724, "grad_norm": 2.0061733722686768, "learning_rate": 0.00016087422577567016, "loss": 1.538, "step": 8738 }, { "epoch": 0.3129622002972407, "grad_norm": 1.3970668315887451, "learning_rate": 0.00016086502307991783, "loss": 1.4419, "step": 8739 }, { "epoch": 0.31299801242680897, "grad_norm": 1.6271003484725952, "learning_rate": 0.0001608558195653051, "loss": 1.2525, "step": 8740 }, { "epoch": 0.31303382455637724, "grad_norm": 1.6746578216552734, "learning_rate": 0.00016084661523195585, "loss": 1.7385, "step": 8741 }, { "epoch": 0.3130696366859455, "grad_norm": 2.0202746391296387, "learning_rate": 0.00016083741007999388, "loss": 1.5669, "step": 8742 }, { "epoch": 0.31310544881551383, "grad_norm": 1.568692684173584, "learning_rate": 0.00016082820410954297, "loss": 1.6994, "step": 8743 }, { "epoch": 0.3131412609450821, "grad_norm": 1.844053030014038, "learning_rate": 0.0001608189973207271, "loss": 1.8135, "step": 8744 }, { "epoch": 0.31317707307465037, "grad_norm": 3.1667978763580322, "learning_rate": 0.00016080978971367004, "loss": 1.8685, "step": 8745 }, { "epoch": 0.3132128852042187, "grad_norm": 2.173363447189331, "learning_rate": 0.00016080058128849572, "loss": 1.4853, "step": 8746 }, { "epoch": 0.31324869733378696, "grad_norm": 1.3892388343811035, "learning_rate": 0.000160791372045328, "loss": 1.3848, "step": 8747 }, { "epoch": 0.31328450946335523, "grad_norm": 1.9529330730438232, "learning_rate": 0.00016078216198429077, "loss": 1.4344, "step": 8748 }, { "epoch": 0.3133203215929235, "grad_norm": 1.5327616930007935, "learning_rate": 0.00016077295110550796, "loss": 1.4037, "step": 8749 }, { "epoch": 0.3133561337224918, "grad_norm": 1.5518343448638916, "learning_rate": 0.0001607637394091035, "loss": 1.4539, "step": 8750 }, { "epoch": 0.3133919458520601, "grad_norm": 1.8877317905426025, "learning_rate": 0.00016075452689520128, "loss": 1.7284, "step": 8751 }, { "epoch": 0.31342775798162836, "grad_norm": 2.0070106983184814, "learning_rate": 0.0001607453135639253, "loss": 1.7028, "step": 8752 }, { "epoch": 0.3134635701111967, "grad_norm": 1.7277096509933472, "learning_rate": 0.00016073609941539944, "loss": 1.3793, "step": 8753 }, { "epoch": 0.31349938224076496, "grad_norm": 2.4877145290374756, "learning_rate": 0.00016072688444974774, "loss": 1.5569, "step": 8754 }, { "epoch": 0.3135351943703332, "grad_norm": 1.5935946702957153, "learning_rate": 0.00016071766866709413, "loss": 1.3458, "step": 8755 }, { "epoch": 0.3135710064999015, "grad_norm": 1.6100068092346191, "learning_rate": 0.00016070845206756263, "loss": 1.7064, "step": 8756 }, { "epoch": 0.3136068186294698, "grad_norm": 1.5823991298675537, "learning_rate": 0.00016069923465127718, "loss": 1.6565, "step": 8757 }, { "epoch": 0.3136426307590381, "grad_norm": 1.5896168947219849, "learning_rate": 0.00016069001641836182, "loss": 1.2576, "step": 8758 }, { "epoch": 0.31367844288860636, "grad_norm": 1.5499836206436157, "learning_rate": 0.00016068079736894058, "loss": 1.642, "step": 8759 }, { "epoch": 0.3137142550181747, "grad_norm": 1.5305835008621216, "learning_rate": 0.00016067157750313752, "loss": 1.3835, "step": 8760 }, { "epoch": 0.31375006714774295, "grad_norm": 1.4311829805374146, "learning_rate": 0.00016066235682107662, "loss": 1.6501, "step": 8761 }, { "epoch": 0.3137858792773112, "grad_norm": 1.4986733198165894, "learning_rate": 0.00016065313532288196, "loss": 1.44, "step": 8762 }, { "epoch": 0.3138216914068795, "grad_norm": 1.811113953590393, "learning_rate": 0.0001606439130086776, "loss": 1.341, "step": 8763 }, { "epoch": 0.3138575035364478, "grad_norm": 1.6800071001052856, "learning_rate": 0.00016063468987858763, "loss": 1.6177, "step": 8764 }, { "epoch": 0.3138933156660161, "grad_norm": 1.6395264863967896, "learning_rate": 0.00016062546593273612, "loss": 1.3695, "step": 8765 }, { "epoch": 0.31392912779558435, "grad_norm": 1.7987147569656372, "learning_rate": 0.00016061624117124715, "loss": 1.634, "step": 8766 }, { "epoch": 0.3139649399251527, "grad_norm": 1.6628080606460571, "learning_rate": 0.00016060701559424484, "loss": 1.509, "step": 8767 }, { "epoch": 0.31400075205472094, "grad_norm": 1.6090434789657593, "learning_rate": 0.00016059778920185332, "loss": 1.3413, "step": 8768 }, { "epoch": 0.3140365641842892, "grad_norm": 1.616045355796814, "learning_rate": 0.0001605885619941967, "loss": 1.6345, "step": 8769 }, { "epoch": 0.3140723763138575, "grad_norm": 2.1155946254730225, "learning_rate": 0.00016057933397139914, "loss": 1.4609, "step": 8770 }, { "epoch": 0.3141081884434258, "grad_norm": 1.705063819885254, "learning_rate": 0.0001605701051335848, "loss": 1.677, "step": 8771 }, { "epoch": 0.3141440005729941, "grad_norm": 1.82172691822052, "learning_rate": 0.00016056087548087784, "loss": 1.5523, "step": 8772 }, { "epoch": 0.31417981270256234, "grad_norm": 1.3792582750320435, "learning_rate": 0.00016055164501340235, "loss": 1.5636, "step": 8773 }, { "epoch": 0.31421562483213067, "grad_norm": 1.869604468345642, "learning_rate": 0.00016054241373128264, "loss": 1.7203, "step": 8774 }, { "epoch": 0.31425143696169894, "grad_norm": 1.405261754989624, "learning_rate": 0.00016053318163464285, "loss": 1.5212, "step": 8775 }, { "epoch": 0.3142872490912672, "grad_norm": 1.8906536102294922, "learning_rate": 0.00016052394872360717, "loss": 1.6859, "step": 8776 }, { "epoch": 0.3143230612208355, "grad_norm": 1.6787978410720825, "learning_rate": 0.00016051471499829983, "loss": 1.5595, "step": 8777 }, { "epoch": 0.3143588733504038, "grad_norm": 1.543586254119873, "learning_rate": 0.00016050548045884501, "loss": 1.5273, "step": 8778 }, { "epoch": 0.31439468547997207, "grad_norm": 1.6794708967208862, "learning_rate": 0.00016049624510536704, "loss": 1.7836, "step": 8779 }, { "epoch": 0.31443049760954034, "grad_norm": 1.616531252861023, "learning_rate": 0.00016048700893799014, "loss": 1.3527, "step": 8780 }, { "epoch": 0.31446630973910866, "grad_norm": 1.3275195360183716, "learning_rate": 0.00016047777195683858, "loss": 1.3845, "step": 8781 }, { "epoch": 0.31450212186867693, "grad_norm": 2.247191905975342, "learning_rate": 0.00016046853416203655, "loss": 1.394, "step": 8782 }, { "epoch": 0.3145379339982452, "grad_norm": 1.556274175643921, "learning_rate": 0.0001604592955537084, "loss": 1.7258, "step": 8783 }, { "epoch": 0.31457374612781347, "grad_norm": 1.2704604864120483, "learning_rate": 0.00016045005613197843, "loss": 1.2194, "step": 8784 }, { "epoch": 0.3146095582573818, "grad_norm": 2.084599494934082, "learning_rate": 0.00016044081589697092, "loss": 1.7761, "step": 8785 }, { "epoch": 0.31464537038695006, "grad_norm": 1.5749155282974243, "learning_rate": 0.0001604315748488102, "loss": 1.4076, "step": 8786 }, { "epoch": 0.31468118251651833, "grad_norm": 1.5411909818649292, "learning_rate": 0.00016042233298762062, "loss": 1.5305, "step": 8787 }, { "epoch": 0.31471699464608666, "grad_norm": 1.7500187158584595, "learning_rate": 0.00016041309031352644, "loss": 1.3224, "step": 8788 }, { "epoch": 0.3147528067756549, "grad_norm": 1.9515713453292847, "learning_rate": 0.00016040384682665214, "loss": 1.5637, "step": 8789 }, { "epoch": 0.3147886189052232, "grad_norm": 1.4233375787734985, "learning_rate": 0.0001603946025271219, "loss": 1.6988, "step": 8790 }, { "epoch": 0.31482443103479146, "grad_norm": 1.8382158279418945, "learning_rate": 0.00016038535741506025, "loss": 1.5435, "step": 8791 }, { "epoch": 0.3148602431643598, "grad_norm": 1.6217607259750366, "learning_rate": 0.00016037611149059147, "loss": 1.4189, "step": 8792 }, { "epoch": 0.31489605529392806, "grad_norm": 1.7804478406906128, "learning_rate": 0.00016036686475384002, "loss": 1.3143, "step": 8793 }, { "epoch": 0.3149318674234963, "grad_norm": 1.3662558794021606, "learning_rate": 0.00016035761720493023, "loss": 1.4369, "step": 8794 }, { "epoch": 0.31496767955306465, "grad_norm": 1.6197288036346436, "learning_rate": 0.0001603483688439866, "loss": 1.4308, "step": 8795 }, { "epoch": 0.3150034916826329, "grad_norm": 2.1956467628479004, "learning_rate": 0.00016033911967113347, "loss": 1.4458, "step": 8796 }, { "epoch": 0.3150393038122012, "grad_norm": 1.8377485275268555, "learning_rate": 0.00016032986968649536, "loss": 1.6501, "step": 8797 }, { "epoch": 0.31507511594176946, "grad_norm": 2.408724546432495, "learning_rate": 0.00016032061889019662, "loss": 1.2958, "step": 8798 }, { "epoch": 0.3151109280713378, "grad_norm": 1.6144731044769287, "learning_rate": 0.00016031136728236184, "loss": 1.5546, "step": 8799 }, { "epoch": 0.31514674020090605, "grad_norm": 1.4665641784667969, "learning_rate": 0.00016030211486311533, "loss": 1.4769, "step": 8800 }, { "epoch": 0.3151825523304743, "grad_norm": 1.562260389328003, "learning_rate": 0.0001602928616325817, "loss": 1.8916, "step": 8801 }, { "epoch": 0.31521836446004264, "grad_norm": 1.9326006174087524, "learning_rate": 0.00016028360759088534, "loss": 1.6079, "step": 8802 }, { "epoch": 0.3152541765896109, "grad_norm": 1.7705698013305664, "learning_rate": 0.00016027435273815085, "loss": 1.4926, "step": 8803 }, { "epoch": 0.3152899887191792, "grad_norm": 1.473935842514038, "learning_rate": 0.00016026509707450266, "loss": 1.379, "step": 8804 }, { "epoch": 0.31532580084874745, "grad_norm": 3.5197129249572754, "learning_rate": 0.00016025584060006532, "loss": 1.5021, "step": 8805 }, { "epoch": 0.3153616129783158, "grad_norm": 1.7519930601119995, "learning_rate": 0.0001602465833149634, "loss": 1.5798, "step": 8806 }, { "epoch": 0.31539742510788404, "grad_norm": 1.7574677467346191, "learning_rate": 0.0001602373252193214, "loss": 1.5769, "step": 8807 }, { "epoch": 0.3154332372374523, "grad_norm": 1.7921092510223389, "learning_rate": 0.0001602280663132639, "loss": 1.31, "step": 8808 }, { "epoch": 0.31546904936702064, "grad_norm": 2.8339967727661133, "learning_rate": 0.00016021880659691546, "loss": 1.3994, "step": 8809 }, { "epoch": 0.3155048614965889, "grad_norm": 1.3064004182815552, "learning_rate": 0.00016020954607040065, "loss": 1.5455, "step": 8810 }, { "epoch": 0.3155406736261572, "grad_norm": 1.6051712036132812, "learning_rate": 0.00016020028473384402, "loss": 1.7627, "step": 8811 }, { "epoch": 0.31557648575572544, "grad_norm": 2.130141019821167, "learning_rate": 0.00016019102258737027, "loss": 1.3654, "step": 8812 }, { "epoch": 0.31561229788529377, "grad_norm": 1.2903608083724976, "learning_rate": 0.00016018175963110389, "loss": 1.4281, "step": 8813 }, { "epoch": 0.31564811001486204, "grad_norm": 2.988527536392212, "learning_rate": 0.00016017249586516963, "loss": 1.5691, "step": 8814 }, { "epoch": 0.3156839221444303, "grad_norm": 1.7782341241836548, "learning_rate": 0.000160163231289692, "loss": 1.6, "step": 8815 }, { "epoch": 0.3157197342739986, "grad_norm": 2.2343456745147705, "learning_rate": 0.00016015396590479575, "loss": 1.5319, "step": 8816 }, { "epoch": 0.3157555464035669, "grad_norm": 1.5296396017074585, "learning_rate": 0.00016014469971060543, "loss": 1.5057, "step": 8817 }, { "epoch": 0.31579135853313517, "grad_norm": 1.5707215070724487, "learning_rate": 0.0001601354327072458, "loss": 1.4725, "step": 8818 }, { "epoch": 0.31582717066270344, "grad_norm": 1.7088760137557983, "learning_rate": 0.00016012616489484148, "loss": 1.4094, "step": 8819 }, { "epoch": 0.31586298279227176, "grad_norm": 1.6975724697113037, "learning_rate": 0.00016011689627351712, "loss": 1.4261, "step": 8820 }, { "epoch": 0.31589879492184003, "grad_norm": 2.539350986480713, "learning_rate": 0.00016010762684339752, "loss": 1.4377, "step": 8821 }, { "epoch": 0.3159346070514083, "grad_norm": 1.2720330953598022, "learning_rate": 0.00016009835660460732, "loss": 1.4547, "step": 8822 }, { "epoch": 0.31597041918097657, "grad_norm": 1.6770910024642944, "learning_rate": 0.00016008908555727123, "loss": 1.469, "step": 8823 }, { "epoch": 0.3160062313105449, "grad_norm": 2.0010297298431396, "learning_rate": 0.00016007981370151406, "loss": 1.5004, "step": 8824 }, { "epoch": 0.31604204344011316, "grad_norm": 1.965331792831421, "learning_rate": 0.00016007054103746047, "loss": 1.59, "step": 8825 }, { "epoch": 0.31607785556968143, "grad_norm": 1.8841441869735718, "learning_rate": 0.00016006126756523524, "loss": 1.6099, "step": 8826 }, { "epoch": 0.31611366769924976, "grad_norm": 1.748795986175537, "learning_rate": 0.0001600519932849631, "loss": 1.3553, "step": 8827 }, { "epoch": 0.316149479828818, "grad_norm": 1.5516470670700073, "learning_rate": 0.00016004271819676887, "loss": 1.6262, "step": 8828 }, { "epoch": 0.3161852919583863, "grad_norm": 1.339726209640503, "learning_rate": 0.0001600334423007773, "loss": 1.4469, "step": 8829 }, { "epoch": 0.31622110408795456, "grad_norm": 1.936703085899353, "learning_rate": 0.0001600241655971132, "loss": 1.5799, "step": 8830 }, { "epoch": 0.3162569162175229, "grad_norm": 2.148743152618408, "learning_rate": 0.0001600148880859014, "loss": 1.5546, "step": 8831 }, { "epoch": 0.31629272834709116, "grad_norm": 2.121833562850952, "learning_rate": 0.0001600056097672667, "loss": 1.7325, "step": 8832 }, { "epoch": 0.3163285404766594, "grad_norm": 1.56114661693573, "learning_rate": 0.00015999633064133392, "loss": 1.3962, "step": 8833 }, { "epoch": 0.31636435260622775, "grad_norm": 1.717720866203308, "learning_rate": 0.0001599870507082279, "loss": 1.6272, "step": 8834 }, { "epoch": 0.316400164735796, "grad_norm": 1.254668116569519, "learning_rate": 0.0001599777699680735, "loss": 1.547, "step": 8835 }, { "epoch": 0.3164359768653643, "grad_norm": 1.9581760168075562, "learning_rate": 0.0001599684884209955, "loss": 1.5793, "step": 8836 }, { "epoch": 0.31647178899493256, "grad_norm": 1.534761905670166, "learning_rate": 0.00015995920606711893, "loss": 1.2339, "step": 8837 }, { "epoch": 0.3165076011245009, "grad_norm": 1.87445867061615, "learning_rate": 0.00015994992290656855, "loss": 1.6946, "step": 8838 }, { "epoch": 0.31654341325406915, "grad_norm": 1.2772846221923828, "learning_rate": 0.00015994063893946928, "loss": 1.609, "step": 8839 }, { "epoch": 0.3165792253836374, "grad_norm": 1.4138472080230713, "learning_rate": 0.000159931354165946, "loss": 1.6902, "step": 8840 }, { "epoch": 0.31661503751320574, "grad_norm": 1.8769257068634033, "learning_rate": 0.0001599220685861237, "loss": 1.3907, "step": 8841 }, { "epoch": 0.316650849642774, "grad_norm": 1.4753496646881104, "learning_rate": 0.00015991278220012727, "loss": 1.6795, "step": 8842 }, { "epoch": 0.3166866617723423, "grad_norm": 2.22638201713562, "learning_rate": 0.00015990349500808162, "loss": 1.538, "step": 8843 }, { "epoch": 0.31672247390191055, "grad_norm": 1.6586682796478271, "learning_rate": 0.00015989420701011171, "loss": 1.4434, "step": 8844 }, { "epoch": 0.3167582860314789, "grad_norm": 1.5222989320755005, "learning_rate": 0.0001598849182063425, "loss": 1.4534, "step": 8845 }, { "epoch": 0.31679409816104714, "grad_norm": 1.7269152402877808, "learning_rate": 0.00015987562859689898, "loss": 1.5415, "step": 8846 }, { "epoch": 0.3168299102906154, "grad_norm": 1.076930284500122, "learning_rate": 0.0001598663381819061, "loss": 1.4976, "step": 8847 }, { "epoch": 0.31686572242018374, "grad_norm": 1.478829026222229, "learning_rate": 0.00015985704696148885, "loss": 1.8695, "step": 8848 }, { "epoch": 0.316901534549752, "grad_norm": 1.7606927156448364, "learning_rate": 0.00015984775493577225, "loss": 1.5552, "step": 8849 }, { "epoch": 0.3169373466793203, "grad_norm": 1.9603705406188965, "learning_rate": 0.0001598384621048813, "loss": 1.4099, "step": 8850 }, { "epoch": 0.31697315880888854, "grad_norm": 1.4589382410049438, "learning_rate": 0.00015982916846894106, "loss": 1.4863, "step": 8851 }, { "epoch": 0.31700897093845687, "grad_norm": 1.6379142999649048, "learning_rate": 0.0001598198740280765, "loss": 1.6302, "step": 8852 }, { "epoch": 0.31704478306802514, "grad_norm": 1.3787086009979248, "learning_rate": 0.00015981057878241273, "loss": 1.2704, "step": 8853 }, { "epoch": 0.3170805951975934, "grad_norm": 1.5794938802719116, "learning_rate": 0.00015980128273207473, "loss": 1.6077, "step": 8854 }, { "epoch": 0.31711640732716173, "grad_norm": 1.6516194343566895, "learning_rate": 0.00015979198587718764, "loss": 1.4949, "step": 8855 }, { "epoch": 0.31715221945673, "grad_norm": 1.768387794494629, "learning_rate": 0.00015978268821787648, "loss": 2.0093, "step": 8856 }, { "epoch": 0.31718803158629827, "grad_norm": 1.8336304426193237, "learning_rate": 0.0001597733897542664, "loss": 1.8041, "step": 8857 }, { "epoch": 0.31722384371586654, "grad_norm": 1.474213719367981, "learning_rate": 0.0001597640904864824, "loss": 1.4373, "step": 8858 }, { "epoch": 0.31725965584543486, "grad_norm": 2.2122840881347656, "learning_rate": 0.00015975479041464974, "loss": 1.4047, "step": 8859 }, { "epoch": 0.31729546797500313, "grad_norm": 1.903743028640747, "learning_rate": 0.0001597454895388934, "loss": 1.3211, "step": 8860 }, { "epoch": 0.3173312801045714, "grad_norm": 1.590577483177185, "learning_rate": 0.00015973618785933858, "loss": 1.6325, "step": 8861 }, { "epoch": 0.3173670922341397, "grad_norm": 1.5541341304779053, "learning_rate": 0.00015972688537611038, "loss": 1.5876, "step": 8862 }, { "epoch": 0.317402904363708, "grad_norm": 1.4802968502044678, "learning_rate": 0.000159717582089334, "loss": 1.5479, "step": 8863 }, { "epoch": 0.31743871649327626, "grad_norm": 1.7659330368041992, "learning_rate": 0.0001597082779991346, "loss": 1.6208, "step": 8864 }, { "epoch": 0.31747452862284453, "grad_norm": 1.7611559629440308, "learning_rate": 0.0001596989731056373, "loss": 1.5296, "step": 8865 }, { "epoch": 0.31751034075241286, "grad_norm": 1.8107753992080688, "learning_rate": 0.00015968966740896736, "loss": 1.704, "step": 8866 }, { "epoch": 0.3175461528819811, "grad_norm": 1.5399701595306396, "learning_rate": 0.0001596803609092499, "loss": 1.4575, "step": 8867 }, { "epoch": 0.3175819650115494, "grad_norm": 1.4436416625976562, "learning_rate": 0.0001596710536066102, "loss": 1.1832, "step": 8868 }, { "epoch": 0.3176177771411177, "grad_norm": 1.613898754119873, "learning_rate": 0.00015966174550117342, "loss": 1.6416, "step": 8869 }, { "epoch": 0.317653589270686, "grad_norm": 1.4900312423706055, "learning_rate": 0.00015965243659306482, "loss": 1.4502, "step": 8870 }, { "epoch": 0.31768940140025426, "grad_norm": 1.5705586671829224, "learning_rate": 0.00015964312688240967, "loss": 1.8653, "step": 8871 }, { "epoch": 0.3177252135298225, "grad_norm": 2.103754997253418, "learning_rate": 0.00015963381636933312, "loss": 1.2867, "step": 8872 }, { "epoch": 0.31776102565939085, "grad_norm": 1.2797647714614868, "learning_rate": 0.00015962450505396051, "loss": 1.2829, "step": 8873 }, { "epoch": 0.3177968377889591, "grad_norm": 1.5395480394363403, "learning_rate": 0.00015961519293641714, "loss": 1.4187, "step": 8874 }, { "epoch": 0.3178326499185274, "grad_norm": 1.9138820171356201, "learning_rate": 0.0001596058800168282, "loss": 1.371, "step": 8875 }, { "epoch": 0.3178684620480957, "grad_norm": 1.351996898651123, "learning_rate": 0.00015959656629531904, "loss": 1.3799, "step": 8876 }, { "epoch": 0.317904274177664, "grad_norm": 1.498100757598877, "learning_rate": 0.00015958725177201495, "loss": 1.3474, "step": 8877 }, { "epoch": 0.31794008630723225, "grad_norm": 1.4798411130905151, "learning_rate": 0.0001595779364470413, "loss": 1.548, "step": 8878 }, { "epoch": 0.3179758984368005, "grad_norm": 1.4499621391296387, "learning_rate": 0.0001595686203205233, "loss": 1.7545, "step": 8879 }, { "epoch": 0.31801171056636884, "grad_norm": 1.3401906490325928, "learning_rate": 0.00015955930339258634, "loss": 1.5813, "step": 8880 }, { "epoch": 0.3180475226959371, "grad_norm": 1.535853624343872, "learning_rate": 0.00015954998566335583, "loss": 1.8776, "step": 8881 }, { "epoch": 0.3180833348255054, "grad_norm": 1.4423049688339233, "learning_rate": 0.00015954066713295707, "loss": 1.468, "step": 8882 }, { "epoch": 0.3181191469550737, "grad_norm": 1.5601565837860107, "learning_rate": 0.00015953134780151543, "loss": 1.6817, "step": 8883 }, { "epoch": 0.318154959084642, "grad_norm": 1.770087480545044, "learning_rate": 0.00015952202766915627, "loss": 1.8615, "step": 8884 }, { "epoch": 0.31819077121421024, "grad_norm": 1.8110880851745605, "learning_rate": 0.00015951270673600503, "loss": 1.1239, "step": 8885 }, { "epoch": 0.3182265833437785, "grad_norm": 1.4439191818237305, "learning_rate": 0.0001595033850021871, "loss": 1.377, "step": 8886 }, { "epoch": 0.31826239547334684, "grad_norm": 2.1991729736328125, "learning_rate": 0.00015949406246782785, "loss": 1.3046, "step": 8887 }, { "epoch": 0.3182982076029151, "grad_norm": 1.6699777841567993, "learning_rate": 0.00015948473913305274, "loss": 1.4169, "step": 8888 }, { "epoch": 0.3183340197324834, "grad_norm": 1.6651912927627563, "learning_rate": 0.00015947541499798721, "loss": 1.6165, "step": 8889 }, { "epoch": 0.3183698318620517, "grad_norm": 1.996559500694275, "learning_rate": 0.00015946609006275666, "loss": 1.3417, "step": 8890 }, { "epoch": 0.31840564399161997, "grad_norm": 1.8512824773788452, "learning_rate": 0.0001594567643274866, "loss": 1.5618, "step": 8891 }, { "epoch": 0.31844145612118824, "grad_norm": 2.2073004245758057, "learning_rate": 0.00015944743779230244, "loss": 1.4179, "step": 8892 }, { "epoch": 0.3184772682507565, "grad_norm": 1.4508863687515259, "learning_rate": 0.00015943811045732973, "loss": 1.4505, "step": 8893 }, { "epoch": 0.31851308038032483, "grad_norm": 2.824732542037964, "learning_rate": 0.00015942878232269388, "loss": 1.6473, "step": 8894 }, { "epoch": 0.3185488925098931, "grad_norm": 1.6162723302841187, "learning_rate": 0.00015941945338852044, "loss": 1.1406, "step": 8895 }, { "epoch": 0.31858470463946137, "grad_norm": 1.8925987482070923, "learning_rate": 0.0001594101236549349, "loss": 1.3241, "step": 8896 }, { "epoch": 0.3186205167690297, "grad_norm": 1.586099624633789, "learning_rate": 0.00015940079312206276, "loss": 1.3499, "step": 8897 }, { "epoch": 0.31865632889859796, "grad_norm": 1.7400895357131958, "learning_rate": 0.00015939146179002957, "loss": 1.5616, "step": 8898 }, { "epoch": 0.31869214102816623, "grad_norm": 1.7760902643203735, "learning_rate": 0.00015938212965896088, "loss": 1.4622, "step": 8899 }, { "epoch": 0.3187279531577345, "grad_norm": 1.524623990058899, "learning_rate": 0.00015937279672898223, "loss": 1.5722, "step": 8900 }, { "epoch": 0.3187637652873028, "grad_norm": 1.5912421941757202, "learning_rate": 0.0001593634630002192, "loss": 1.4204, "step": 8901 }, { "epoch": 0.3187995774168711, "grad_norm": 2.2556264400482178, "learning_rate": 0.00015935412847279735, "loss": 1.7675, "step": 8902 }, { "epoch": 0.31883538954643936, "grad_norm": 1.4288015365600586, "learning_rate": 0.00015934479314684224, "loss": 1.4929, "step": 8903 }, { "epoch": 0.3188712016760077, "grad_norm": 1.7608261108398438, "learning_rate": 0.00015933545702247952, "loss": 1.6204, "step": 8904 }, { "epoch": 0.31890701380557596, "grad_norm": 1.5694899559020996, "learning_rate": 0.00015932612009983475, "loss": 1.5306, "step": 8905 }, { "epoch": 0.3189428259351442, "grad_norm": 1.9936593770980835, "learning_rate": 0.00015931678237903353, "loss": 1.7889, "step": 8906 }, { "epoch": 0.3189786380647125, "grad_norm": 1.8415395021438599, "learning_rate": 0.00015930744386020152, "loss": 1.3515, "step": 8907 }, { "epoch": 0.3190144501942808, "grad_norm": 2.319857597351074, "learning_rate": 0.0001592981045434644, "loss": 1.338, "step": 8908 }, { "epoch": 0.3190502623238491, "grad_norm": 1.5549952983856201, "learning_rate": 0.0001592887644289477, "loss": 1.5115, "step": 8909 }, { "epoch": 0.31908607445341736, "grad_norm": 2.5317156314849854, "learning_rate": 0.0001592794235167772, "loss": 1.2207, "step": 8910 }, { "epoch": 0.3191218865829857, "grad_norm": 1.3313252925872803, "learning_rate": 0.00015927008180707854, "loss": 1.5237, "step": 8911 }, { "epoch": 0.31915769871255395, "grad_norm": 2.1463143825531006, "learning_rate": 0.00015926073929997735, "loss": 1.4248, "step": 8912 }, { "epoch": 0.3191935108421222, "grad_norm": 1.8104480504989624, "learning_rate": 0.00015925139599559939, "loss": 1.5752, "step": 8913 }, { "epoch": 0.3192293229716905, "grad_norm": 2.5905628204345703, "learning_rate": 0.0001592420518940703, "loss": 1.5152, "step": 8914 }, { "epoch": 0.3192651351012588, "grad_norm": 2.154703140258789, "learning_rate": 0.0001592327069955158, "loss": 1.8066, "step": 8915 }, { "epoch": 0.3193009472308271, "grad_norm": 1.2889586687088013, "learning_rate": 0.00015922336130006162, "loss": 1.2934, "step": 8916 }, { "epoch": 0.31933675936039535, "grad_norm": 1.5765522718429565, "learning_rate": 0.00015921401480783356, "loss": 1.7927, "step": 8917 }, { "epoch": 0.3193725714899637, "grad_norm": 1.3909289836883545, "learning_rate": 0.0001592046675189573, "loss": 1.4859, "step": 8918 }, { "epoch": 0.31940838361953194, "grad_norm": 1.4396029710769653, "learning_rate": 0.00015919531943355857, "loss": 1.3662, "step": 8919 }, { "epoch": 0.3194441957491002, "grad_norm": 2.2816967964172363, "learning_rate": 0.0001591859705517632, "loss": 1.678, "step": 8920 }, { "epoch": 0.3194800078786685, "grad_norm": 2.0098812580108643, "learning_rate": 0.00015917662087369693, "loss": 1.8597, "step": 8921 }, { "epoch": 0.3195158200082368, "grad_norm": 1.5743976831436157, "learning_rate": 0.0001591672703994856, "loss": 1.3576, "step": 8922 }, { "epoch": 0.3195516321378051, "grad_norm": 1.5148391723632812, "learning_rate": 0.00015915791912925493, "loss": 1.4273, "step": 8923 }, { "epoch": 0.31958744426737334, "grad_norm": 1.6980741024017334, "learning_rate": 0.00015914856706313076, "loss": 1.4308, "step": 8924 }, { "epoch": 0.31962325639694167, "grad_norm": 1.7159067392349243, "learning_rate": 0.00015913921420123892, "loss": 1.843, "step": 8925 }, { "epoch": 0.31965906852650994, "grad_norm": 1.428824782371521, "learning_rate": 0.00015912986054370524, "loss": 1.4445, "step": 8926 }, { "epoch": 0.3196948806560782, "grad_norm": 1.6858423948287964, "learning_rate": 0.00015912050609065556, "loss": 1.1656, "step": 8927 }, { "epoch": 0.3197306927856465, "grad_norm": 1.671278476715088, "learning_rate": 0.00015911115084221575, "loss": 1.7137, "step": 8928 }, { "epoch": 0.3197665049152148, "grad_norm": 2.173511505126953, "learning_rate": 0.00015910179479851163, "loss": 1.6492, "step": 8929 }, { "epoch": 0.31980231704478307, "grad_norm": 1.9193826913833618, "learning_rate": 0.0001590924379596691, "loss": 1.5464, "step": 8930 }, { "epoch": 0.31983812917435134, "grad_norm": 1.7963279485702515, "learning_rate": 0.00015908308032581406, "loss": 1.7011, "step": 8931 }, { "epoch": 0.31987394130391966, "grad_norm": 1.4274190664291382, "learning_rate": 0.00015907372189707237, "loss": 1.4982, "step": 8932 }, { "epoch": 0.31990975343348793, "grad_norm": 1.926442265510559, "learning_rate": 0.00015906436267356993, "loss": 1.3027, "step": 8933 }, { "epoch": 0.3199455655630562, "grad_norm": 1.577248454093933, "learning_rate": 0.00015905500265543272, "loss": 1.4747, "step": 8934 }, { "epoch": 0.31998137769262447, "grad_norm": 2.0698063373565674, "learning_rate": 0.0001590456418427866, "loss": 1.6706, "step": 8935 }, { "epoch": 0.3200171898221928, "grad_norm": 1.6844439506530762, "learning_rate": 0.00015903628023575755, "loss": 1.2918, "step": 8936 }, { "epoch": 0.32005300195176106, "grad_norm": 1.304545283317566, "learning_rate": 0.00015902691783447142, "loss": 1.4205, "step": 8937 }, { "epoch": 0.32008881408132933, "grad_norm": 1.6640790700912476, "learning_rate": 0.00015901755463905434, "loss": 1.5702, "step": 8938 }, { "epoch": 0.32012462621089766, "grad_norm": 1.569692611694336, "learning_rate": 0.00015900819064963218, "loss": 1.5811, "step": 8939 }, { "epoch": 0.3201604383404659, "grad_norm": 1.8186932802200317, "learning_rate": 0.00015899882586633093, "loss": 1.2104, "step": 8940 }, { "epoch": 0.3201962504700342, "grad_norm": 1.6925221681594849, "learning_rate": 0.00015898946028927656, "loss": 1.5124, "step": 8941 }, { "epoch": 0.32023206259960246, "grad_norm": 1.6941040754318237, "learning_rate": 0.0001589800939185951, "loss": 1.5075, "step": 8942 }, { "epoch": 0.3202678747291708, "grad_norm": 1.9452513456344604, "learning_rate": 0.00015897072675441254, "loss": 1.5764, "step": 8943 }, { "epoch": 0.32030368685873906, "grad_norm": 2.2057533264160156, "learning_rate": 0.00015896135879685494, "loss": 1.7398, "step": 8944 }, { "epoch": 0.3203394989883073, "grad_norm": 1.8561280965805054, "learning_rate": 0.0001589519900460483, "loss": 1.5916, "step": 8945 }, { "epoch": 0.32037531111787565, "grad_norm": 1.8078523874282837, "learning_rate": 0.00015894262050211868, "loss": 1.4903, "step": 8946 }, { "epoch": 0.3204111232474439, "grad_norm": 1.5678856372833252, "learning_rate": 0.00015893325016519213, "loss": 1.5585, "step": 8947 }, { "epoch": 0.3204469353770122, "grad_norm": 2.330113172531128, "learning_rate": 0.0001589238790353947, "loss": 1.6519, "step": 8948 }, { "epoch": 0.32048274750658046, "grad_norm": 1.4870229959487915, "learning_rate": 0.00015891450711285254, "loss": 1.569, "step": 8949 }, { "epoch": 0.3205185596361488, "grad_norm": 2.701037883758545, "learning_rate": 0.00015890513439769164, "loss": 1.8232, "step": 8950 }, { "epoch": 0.32055437176571705, "grad_norm": 1.4118682146072388, "learning_rate": 0.00015889576089003814, "loss": 1.373, "step": 8951 }, { "epoch": 0.3205901838952853, "grad_norm": 1.6131876707077026, "learning_rate": 0.00015888638659001815, "loss": 1.503, "step": 8952 }, { "epoch": 0.32062599602485364, "grad_norm": 2.0327181816101074, "learning_rate": 0.0001588770114977578, "loss": 1.5179, "step": 8953 }, { "epoch": 0.3206618081544219, "grad_norm": 1.8969197273254395, "learning_rate": 0.00015886763561338317, "loss": 1.8301, "step": 8954 }, { "epoch": 0.3206976202839902, "grad_norm": 1.4035693407058716, "learning_rate": 0.00015885825893702048, "loss": 1.6315, "step": 8955 }, { "epoch": 0.32073343241355845, "grad_norm": 1.4035834074020386, "learning_rate": 0.0001588488814687958, "loss": 1.5281, "step": 8956 }, { "epoch": 0.3207692445431268, "grad_norm": 1.6486170291900635, "learning_rate": 0.00015883950320883536, "loss": 1.3552, "step": 8957 }, { "epoch": 0.32080505667269504, "grad_norm": 1.6082379817962646, "learning_rate": 0.0001588301241572653, "loss": 1.5837, "step": 8958 }, { "epoch": 0.3208408688022633, "grad_norm": 1.3907922506332397, "learning_rate": 0.0001588207443142118, "loss": 1.6525, "step": 8959 }, { "epoch": 0.32087668093183164, "grad_norm": 1.9259198904037476, "learning_rate": 0.00015881136367980103, "loss": 1.6522, "step": 8960 }, { "epoch": 0.3209124930613999, "grad_norm": 1.92613685131073, "learning_rate": 0.00015880198225415925, "loss": 1.2868, "step": 8961 }, { "epoch": 0.3209483051909682, "grad_norm": 1.6421856880187988, "learning_rate": 0.00015879260003741265, "loss": 1.7828, "step": 8962 }, { "epoch": 0.32098411732053644, "grad_norm": 1.4043571949005127, "learning_rate": 0.00015878321702968745, "loss": 1.6435, "step": 8963 }, { "epoch": 0.32101992945010477, "grad_norm": 2.38551926612854, "learning_rate": 0.0001587738332311099, "loss": 2.2681, "step": 8964 }, { "epoch": 0.32105574157967304, "grad_norm": 1.5245349407196045, "learning_rate": 0.0001587644486418062, "loss": 1.5347, "step": 8965 }, { "epoch": 0.3210915537092413, "grad_norm": 1.5355474948883057, "learning_rate": 0.00015875506326190267, "loss": 1.2685, "step": 8966 }, { "epoch": 0.32112736583880963, "grad_norm": 1.4486896991729736, "learning_rate": 0.00015874567709152557, "loss": 1.221, "step": 8967 }, { "epoch": 0.3211631779683779, "grad_norm": 1.4446889162063599, "learning_rate": 0.00015873629013080114, "loss": 1.5916, "step": 8968 }, { "epoch": 0.32119899009794617, "grad_norm": 2.2588870525360107, "learning_rate": 0.0001587269023798557, "loss": 1.5266, "step": 8969 }, { "epoch": 0.32123480222751444, "grad_norm": 1.638065218925476, "learning_rate": 0.0001587175138388155, "loss": 1.5114, "step": 8970 }, { "epoch": 0.32127061435708276, "grad_norm": 2.1063241958618164, "learning_rate": 0.00015870812450780695, "loss": 1.5352, "step": 8971 }, { "epoch": 0.32130642648665103, "grad_norm": 1.6843229532241821, "learning_rate": 0.00015869873438695628, "loss": 1.4685, "step": 8972 }, { "epoch": 0.3213422386162193, "grad_norm": 1.2848390340805054, "learning_rate": 0.00015868934347638985, "loss": 1.3653, "step": 8973 }, { "epoch": 0.3213780507457876, "grad_norm": 1.7618560791015625, "learning_rate": 0.00015867995177623403, "loss": 1.5555, "step": 8974 }, { "epoch": 0.3214138628753559, "grad_norm": 2.0183522701263428, "learning_rate": 0.00015867055928661517, "loss": 1.6017, "step": 8975 }, { "epoch": 0.32144967500492416, "grad_norm": 1.6978468894958496, "learning_rate": 0.00015866116600765957, "loss": 1.493, "step": 8976 }, { "epoch": 0.32148548713449243, "grad_norm": 1.5752780437469482, "learning_rate": 0.00015865177193949366, "loss": 1.5721, "step": 8977 }, { "epoch": 0.32152129926406076, "grad_norm": 1.5918536186218262, "learning_rate": 0.0001586423770822438, "loss": 1.4768, "step": 8978 }, { "epoch": 0.321557111393629, "grad_norm": 1.8312736749649048, "learning_rate": 0.0001586329814360364, "loss": 1.6069, "step": 8979 }, { "epoch": 0.3215929235231973, "grad_norm": 1.5193839073181152, "learning_rate": 0.0001586235850009979, "loss": 1.386, "step": 8980 }, { "epoch": 0.3216287356527656, "grad_norm": 2.7475154399871826, "learning_rate": 0.00015861418777725467, "loss": 1.451, "step": 8981 }, { "epoch": 0.3216645477823339, "grad_norm": 1.462113618850708, "learning_rate": 0.00015860478976493313, "loss": 1.4151, "step": 8982 }, { "epoch": 0.32170035991190216, "grad_norm": 1.9317468404769897, "learning_rate": 0.00015859539096415976, "loss": 1.6592, "step": 8983 }, { "epoch": 0.3217361720414704, "grad_norm": 1.8942067623138428, "learning_rate": 0.000158585991375061, "loss": 1.643, "step": 8984 }, { "epoch": 0.32177198417103875, "grad_norm": 1.6340413093566895, "learning_rate": 0.00015857659099776327, "loss": 1.5538, "step": 8985 }, { "epoch": 0.321807796300607, "grad_norm": 1.7912198305130005, "learning_rate": 0.0001585671898323931, "loss": 1.7632, "step": 8986 }, { "epoch": 0.3218436084301753, "grad_norm": 1.6519590616226196, "learning_rate": 0.0001585577878790769, "loss": 1.1778, "step": 8987 }, { "epoch": 0.3218794205597436, "grad_norm": 1.6828880310058594, "learning_rate": 0.00015854838513794118, "loss": 1.7943, "step": 8988 }, { "epoch": 0.3219152326893119, "grad_norm": 1.6437028646469116, "learning_rate": 0.00015853898160911252, "loss": 1.8893, "step": 8989 }, { "epoch": 0.32195104481888015, "grad_norm": 1.3070831298828125, "learning_rate": 0.00015852957729271735, "loss": 1.1287, "step": 8990 }, { "epoch": 0.3219868569484484, "grad_norm": 1.915002703666687, "learning_rate": 0.00015852017218888218, "loss": 1.3972, "step": 8991 }, { "epoch": 0.32202266907801674, "grad_norm": 1.2701020240783691, "learning_rate": 0.0001585107662977336, "loss": 1.5031, "step": 8992 }, { "epoch": 0.322058481207585, "grad_norm": 1.6998685598373413, "learning_rate": 0.00015850135961939814, "loss": 1.2343, "step": 8993 }, { "epoch": 0.3220942933371533, "grad_norm": 1.3520300388336182, "learning_rate": 0.00015849195215400234, "loss": 1.6848, "step": 8994 }, { "epoch": 0.3221301054667216, "grad_norm": 1.4436218738555908, "learning_rate": 0.0001584825439016728, "loss": 1.5324, "step": 8995 }, { "epoch": 0.3221659175962899, "grad_norm": 2.294487953186035, "learning_rate": 0.00015847313486253603, "loss": 1.8497, "step": 8996 }, { "epoch": 0.32220172972585814, "grad_norm": 1.5929428339004517, "learning_rate": 0.0001584637250367187, "loss": 1.4359, "step": 8997 }, { "epoch": 0.3222375418554264, "grad_norm": 2.296165943145752, "learning_rate": 0.00015845431442434733, "loss": 1.8707, "step": 8998 }, { "epoch": 0.32227335398499474, "grad_norm": 1.5806833505630493, "learning_rate": 0.00015844490302554856, "loss": 1.6144, "step": 8999 }, { "epoch": 0.322309166114563, "grad_norm": 1.7950143814086914, "learning_rate": 0.00015843549084044903, "loss": 1.7005, "step": 9000 }, { "epoch": 0.3223449782441313, "grad_norm": 2.0502243041992188, "learning_rate": 0.0001584260778691753, "loss": 1.5181, "step": 9001 }, { "epoch": 0.3223807903736996, "grad_norm": 1.9354183673858643, "learning_rate": 0.00015841666411185411, "loss": 1.3239, "step": 9002 }, { "epoch": 0.32241660250326787, "grad_norm": 1.4576351642608643, "learning_rate": 0.000158407249568612, "loss": 1.46, "step": 9003 }, { "epoch": 0.32245241463283614, "grad_norm": 1.6081417798995972, "learning_rate": 0.00015839783423957576, "loss": 1.4387, "step": 9004 }, { "epoch": 0.3224882267624044, "grad_norm": 2.0253992080688477, "learning_rate": 0.00015838841812487194, "loss": 1.7281, "step": 9005 }, { "epoch": 0.32252403889197273, "grad_norm": 1.742729902267456, "learning_rate": 0.00015837900122462725, "loss": 1.3755, "step": 9006 }, { "epoch": 0.322559851021541, "grad_norm": 2.4512531757354736, "learning_rate": 0.00015836958353896845, "loss": 1.3772, "step": 9007 }, { "epoch": 0.32259566315110927, "grad_norm": 1.7238280773162842, "learning_rate": 0.00015836016506802218, "loss": 1.3526, "step": 9008 }, { "epoch": 0.3226314752806776, "grad_norm": 2.1217103004455566, "learning_rate": 0.00015835074581191516, "loss": 1.7, "step": 9009 }, { "epoch": 0.32266728741024586, "grad_norm": 1.888587474822998, "learning_rate": 0.00015834132577077412, "loss": 1.5873, "step": 9010 }, { "epoch": 0.32270309953981413, "grad_norm": 1.8031010627746582, "learning_rate": 0.00015833190494472582, "loss": 1.8754, "step": 9011 }, { "epoch": 0.3227389116693824, "grad_norm": 1.394250750541687, "learning_rate": 0.00015832248333389693, "loss": 1.5981, "step": 9012 }, { "epoch": 0.3227747237989507, "grad_norm": 1.8603804111480713, "learning_rate": 0.00015831306093841432, "loss": 1.4249, "step": 9013 }, { "epoch": 0.322810535928519, "grad_norm": 2.721745014190674, "learning_rate": 0.00015830363775840467, "loss": 1.3469, "step": 9014 }, { "epoch": 0.32284634805808726, "grad_norm": 1.7731702327728271, "learning_rate": 0.00015829421379399475, "loss": 1.742, "step": 9015 }, { "epoch": 0.32288216018765553, "grad_norm": 1.3356654644012451, "learning_rate": 0.00015828478904531142, "loss": 1.4635, "step": 9016 }, { "epoch": 0.32291797231722386, "grad_norm": 1.3644803762435913, "learning_rate": 0.0001582753635124814, "loss": 1.3199, "step": 9017 }, { "epoch": 0.3229537844467921, "grad_norm": 1.669406771659851, "learning_rate": 0.00015826593719563156, "loss": 1.3352, "step": 9018 }, { "epoch": 0.3229895965763604, "grad_norm": 1.548660397529602, "learning_rate": 0.0001582565100948887, "loss": 1.5234, "step": 9019 }, { "epoch": 0.3230254087059287, "grad_norm": 1.9580634832382202, "learning_rate": 0.00015824708221037965, "loss": 1.5996, "step": 9020 }, { "epoch": 0.323061220835497, "grad_norm": 1.9380831718444824, "learning_rate": 0.0001582376535422312, "loss": 1.245, "step": 9021 }, { "epoch": 0.32309703296506526, "grad_norm": 1.6162350177764893, "learning_rate": 0.00015822822409057024, "loss": 1.4507, "step": 9022 }, { "epoch": 0.3231328450946335, "grad_norm": 1.2511042356491089, "learning_rate": 0.00015821879385552367, "loss": 1.3691, "step": 9023 }, { "epoch": 0.32316865722420185, "grad_norm": 1.4707595109939575, "learning_rate": 0.00015820936283721834, "loss": 1.6036, "step": 9024 }, { "epoch": 0.3232044693537701, "grad_norm": 2.1324875354766846, "learning_rate": 0.00015819993103578106, "loss": 1.4427, "step": 9025 }, { "epoch": 0.3232402814833384, "grad_norm": 1.6112456321716309, "learning_rate": 0.0001581904984513388, "loss": 1.3933, "step": 9026 }, { "epoch": 0.3232760936129067, "grad_norm": 2.6338889598846436, "learning_rate": 0.00015818106508401847, "loss": 1.508, "step": 9027 }, { "epoch": 0.323311905742475, "grad_norm": 2.0561411380767822, "learning_rate": 0.00015817163093394693, "loss": 1.3873, "step": 9028 }, { "epoch": 0.32334771787204325, "grad_norm": 1.4188092947006226, "learning_rate": 0.00015816219600125114, "loss": 1.3726, "step": 9029 }, { "epoch": 0.3233835300016115, "grad_norm": 2.1878104209899902, "learning_rate": 0.00015815276028605807, "loss": 1.3186, "step": 9030 }, { "epoch": 0.32341934213117984, "grad_norm": 1.4584554433822632, "learning_rate": 0.00015814332378849457, "loss": 1.5844, "step": 9031 }, { "epoch": 0.3234551542607481, "grad_norm": 2.6032674312591553, "learning_rate": 0.00015813388650868766, "loss": 1.4215, "step": 9032 }, { "epoch": 0.3234909663903164, "grad_norm": 1.6895302534103394, "learning_rate": 0.00015812444844676428, "loss": 1.5978, "step": 9033 }, { "epoch": 0.3235267785198847, "grad_norm": 1.8651604652404785, "learning_rate": 0.00015811500960285143, "loss": 1.3851, "step": 9034 }, { "epoch": 0.323562590649453, "grad_norm": 1.7477426528930664, "learning_rate": 0.00015810556997707608, "loss": 1.2799, "step": 9035 }, { "epoch": 0.32359840277902124, "grad_norm": 1.9270200729370117, "learning_rate": 0.00015809612956956527, "loss": 1.335, "step": 9036 }, { "epoch": 0.3236342149085895, "grad_norm": 1.5941219329833984, "learning_rate": 0.00015808668838044595, "loss": 1.5451, "step": 9037 }, { "epoch": 0.32367002703815784, "grad_norm": 2.0007755756378174, "learning_rate": 0.00015807724640984518, "loss": 1.4461, "step": 9038 }, { "epoch": 0.3237058391677261, "grad_norm": 2.1980857849121094, "learning_rate": 0.00015806780365788998, "loss": 1.8456, "step": 9039 }, { "epoch": 0.3237416512972944, "grad_norm": 4.429500579833984, "learning_rate": 0.00015805836012470733, "loss": 1.8815, "step": 9040 }, { "epoch": 0.3237774634268627, "grad_norm": 1.9881982803344727, "learning_rate": 0.0001580489158104244, "loss": 1.5833, "step": 9041 }, { "epoch": 0.32381327555643097, "grad_norm": 2.5081233978271484, "learning_rate": 0.00015803947071516813, "loss": 1.2176, "step": 9042 }, { "epoch": 0.32384908768599924, "grad_norm": 1.551257848739624, "learning_rate": 0.00015803002483906568, "loss": 1.6953, "step": 9043 }, { "epoch": 0.3238848998155675, "grad_norm": 1.6371716260910034, "learning_rate": 0.0001580205781822441, "loss": 1.4023, "step": 9044 }, { "epoch": 0.32392071194513583, "grad_norm": 1.3484255075454712, "learning_rate": 0.00015801113074483046, "loss": 1.6748, "step": 9045 }, { "epoch": 0.3239565240747041, "grad_norm": 1.9768213033676147, "learning_rate": 0.0001580016825269519, "loss": 1.6158, "step": 9046 }, { "epoch": 0.32399233620427237, "grad_norm": 1.9486857652664185, "learning_rate": 0.00015799223352873555, "loss": 1.4215, "step": 9047 }, { "epoch": 0.3240281483338407, "grad_norm": 1.4396650791168213, "learning_rate": 0.00015798278375030845, "loss": 1.1456, "step": 9048 }, { "epoch": 0.32406396046340896, "grad_norm": 1.3435920476913452, "learning_rate": 0.0001579733331917978, "loss": 1.4729, "step": 9049 }, { "epoch": 0.32409977259297723, "grad_norm": 1.549360990524292, "learning_rate": 0.00015796388185333076, "loss": 1.4954, "step": 9050 }, { "epoch": 0.3241355847225455, "grad_norm": 1.8293956518173218, "learning_rate": 0.00015795442973503442, "loss": 1.6314, "step": 9051 }, { "epoch": 0.3241713968521138, "grad_norm": 1.7965165376663208, "learning_rate": 0.00015794497683703601, "loss": 1.9173, "step": 9052 }, { "epoch": 0.3242072089816821, "grad_norm": 1.5780364274978638, "learning_rate": 0.00015793552315946266, "loss": 1.5728, "step": 9053 }, { "epoch": 0.32424302111125036, "grad_norm": 3.517538547515869, "learning_rate": 0.00015792606870244162, "loss": 1.7913, "step": 9054 }, { "epoch": 0.3242788332408187, "grad_norm": 1.9409570693969727, "learning_rate": 0.0001579166134661, "loss": 1.3348, "step": 9055 }, { "epoch": 0.32431464537038696, "grad_norm": 1.7771880626678467, "learning_rate": 0.00015790715745056506, "loss": 1.6459, "step": 9056 }, { "epoch": 0.3243504574999552, "grad_norm": 1.7616101503372192, "learning_rate": 0.00015789770065596404, "loss": 1.7816, "step": 9057 }, { "epoch": 0.3243862696295235, "grad_norm": 1.777740478515625, "learning_rate": 0.00015788824308242408, "loss": 1.8904, "step": 9058 }, { "epoch": 0.3244220817590918, "grad_norm": 2.1632614135742188, "learning_rate": 0.00015787878473007253, "loss": 1.6462, "step": 9059 }, { "epoch": 0.3244578938886601, "grad_norm": 2.3858914375305176, "learning_rate": 0.00015786932559903657, "loss": 1.6606, "step": 9060 }, { "epoch": 0.32449370601822836, "grad_norm": 1.9479713439941406, "learning_rate": 0.00015785986568944352, "loss": 1.7255, "step": 9061 }, { "epoch": 0.3245295181477967, "grad_norm": 1.4364668130874634, "learning_rate": 0.00015785040500142057, "loss": 1.5002, "step": 9062 }, { "epoch": 0.32456533027736495, "grad_norm": 2.068448066711426, "learning_rate": 0.00015784094353509507, "loss": 1.4521, "step": 9063 }, { "epoch": 0.3246011424069332, "grad_norm": 2.5338597297668457, "learning_rate": 0.00015783148129059425, "loss": 1.5654, "step": 9064 }, { "epoch": 0.3246369545365015, "grad_norm": 2.1325583457946777, "learning_rate": 0.00015782201826804548, "loss": 1.6559, "step": 9065 }, { "epoch": 0.3246727666660698, "grad_norm": 1.665851354598999, "learning_rate": 0.000157812554467576, "loss": 1.3783, "step": 9066 }, { "epoch": 0.3247085787956381, "grad_norm": 1.4407624006271362, "learning_rate": 0.0001578030898893132, "loss": 1.6831, "step": 9067 }, { "epoch": 0.32474439092520635, "grad_norm": 1.782218098640442, "learning_rate": 0.00015779362453338438, "loss": 1.7928, "step": 9068 }, { "epoch": 0.3247802030547747, "grad_norm": 1.5871248245239258, "learning_rate": 0.0001577841583999169, "loss": 1.4532, "step": 9069 }, { "epoch": 0.32481601518434294, "grad_norm": 1.4326859712600708, "learning_rate": 0.00015777469148903808, "loss": 1.5741, "step": 9070 }, { "epoch": 0.3248518273139112, "grad_norm": 1.2834270000457764, "learning_rate": 0.00015776522380087532, "loss": 1.5228, "step": 9071 }, { "epoch": 0.3248876394434795, "grad_norm": 1.9993133544921875, "learning_rate": 0.00015775575533555602, "loss": 1.6291, "step": 9072 }, { "epoch": 0.3249234515730478, "grad_norm": 1.301542043685913, "learning_rate": 0.0001577462860932075, "loss": 1.4781, "step": 9073 }, { "epoch": 0.3249592637026161, "grad_norm": 1.7204660177230835, "learning_rate": 0.00015773681607395717, "loss": 1.2986, "step": 9074 }, { "epoch": 0.32499507583218434, "grad_norm": 2.050814628601074, "learning_rate": 0.0001577273452779325, "loss": 1.8979, "step": 9075 }, { "epoch": 0.32503088796175267, "grad_norm": 1.3881765604019165, "learning_rate": 0.00015771787370526084, "loss": 1.5927, "step": 9076 }, { "epoch": 0.32506670009132094, "grad_norm": 1.4091527462005615, "learning_rate": 0.0001577084013560696, "loss": 1.3812, "step": 9077 }, { "epoch": 0.3251025122208892, "grad_norm": 2.129033088684082, "learning_rate": 0.0001576989282304863, "loss": 1.4002, "step": 9078 }, { "epoch": 0.3251383243504575, "grad_norm": 1.6239925622940063, "learning_rate": 0.00015768945432863835, "loss": 1.4022, "step": 9079 }, { "epoch": 0.3251741364800258, "grad_norm": 2.6688060760498047, "learning_rate": 0.00015767997965065322, "loss": 1.7299, "step": 9080 }, { "epoch": 0.32520994860959407, "grad_norm": 1.6006053686141968, "learning_rate": 0.00015767050419665836, "loss": 1.4682, "step": 9081 }, { "epoch": 0.32524576073916234, "grad_norm": 1.31721830368042, "learning_rate": 0.00015766102796678123, "loss": 1.5401, "step": 9082 }, { "epoch": 0.32528157286873066, "grad_norm": 2.0746214389801025, "learning_rate": 0.00015765155096114934, "loss": 1.6972, "step": 9083 }, { "epoch": 0.32531738499829893, "grad_norm": 1.4532837867736816, "learning_rate": 0.00015764207317989023, "loss": 1.1788, "step": 9084 }, { "epoch": 0.3253531971278672, "grad_norm": 1.7890650033950806, "learning_rate": 0.00015763259462313136, "loss": 1.5181, "step": 9085 }, { "epoch": 0.32538900925743547, "grad_norm": 1.5378397703170776, "learning_rate": 0.00015762311529100024, "loss": 1.7676, "step": 9086 }, { "epoch": 0.3254248213870038, "grad_norm": 1.4087746143341064, "learning_rate": 0.00015761363518362447, "loss": 1.5524, "step": 9087 }, { "epoch": 0.32546063351657206, "grad_norm": 1.3853182792663574, "learning_rate": 0.00015760415430113157, "loss": 1.6659, "step": 9088 }, { "epoch": 0.32549644564614033, "grad_norm": 1.8238393068313599, "learning_rate": 0.00015759467264364905, "loss": 1.7841, "step": 9089 }, { "epoch": 0.32553225777570866, "grad_norm": 1.984938383102417, "learning_rate": 0.00015758519021130451, "loss": 1.5044, "step": 9090 }, { "epoch": 0.3255680699052769, "grad_norm": 1.9241176843643188, "learning_rate": 0.0001575757070042255, "loss": 1.658, "step": 9091 }, { "epoch": 0.3256038820348452, "grad_norm": 1.9488638639450073, "learning_rate": 0.00015756622302253966, "loss": 1.5738, "step": 9092 }, { "epoch": 0.32563969416441346, "grad_norm": 1.1886811256408691, "learning_rate": 0.0001575567382663745, "loss": 1.5968, "step": 9093 }, { "epoch": 0.3256755062939818, "grad_norm": 1.2945717573165894, "learning_rate": 0.00015754725273585767, "loss": 1.3245, "step": 9094 }, { "epoch": 0.32571131842355006, "grad_norm": 1.674601674079895, "learning_rate": 0.0001575377664311168, "loss": 1.521, "step": 9095 }, { "epoch": 0.3257471305531183, "grad_norm": 1.8002949953079224, "learning_rate": 0.00015752827935227952, "loss": 1.4273, "step": 9096 }, { "epoch": 0.32578294268268665, "grad_norm": 2.0485355854034424, "learning_rate": 0.00015751879149947343, "loss": 1.6217, "step": 9097 }, { "epoch": 0.3258187548122549, "grad_norm": 1.7260215282440186, "learning_rate": 0.0001575093028728262, "loss": 1.1519, "step": 9098 }, { "epoch": 0.3258545669418232, "grad_norm": 2.4711947441101074, "learning_rate": 0.00015749981347246549, "loss": 1.5758, "step": 9099 }, { "epoch": 0.32589037907139146, "grad_norm": 1.7342562675476074, "learning_rate": 0.00015749032329851894, "loss": 1.3542, "step": 9100 }, { "epoch": 0.3259261912009598, "grad_norm": 1.5560182332992554, "learning_rate": 0.00015748083235111424, "loss": 1.4425, "step": 9101 }, { "epoch": 0.32596200333052805, "grad_norm": 1.55160653591156, "learning_rate": 0.00015747134063037908, "loss": 1.7229, "step": 9102 }, { "epoch": 0.3259978154600963, "grad_norm": 1.796183466911316, "learning_rate": 0.0001574618481364412, "loss": 1.5933, "step": 9103 }, { "epoch": 0.32603362758966464, "grad_norm": 1.8876303434371948, "learning_rate": 0.00015745235486942826, "loss": 1.6456, "step": 9104 }, { "epoch": 0.3260694397192329, "grad_norm": 1.37518310546875, "learning_rate": 0.00015744286082946797, "loss": 1.3399, "step": 9105 }, { "epoch": 0.3261052518488012, "grad_norm": 1.4342260360717773, "learning_rate": 0.0001574333660166881, "loss": 1.5002, "step": 9106 }, { "epoch": 0.32614106397836945, "grad_norm": 1.8933452367782593, "learning_rate": 0.0001574238704312164, "loss": 1.2322, "step": 9107 }, { "epoch": 0.3261768761079378, "grad_norm": 1.6854076385498047, "learning_rate": 0.00015741437407318056, "loss": 1.6315, "step": 9108 }, { "epoch": 0.32621268823750604, "grad_norm": 1.8995707035064697, "learning_rate": 0.00015740487694270838, "loss": 1.4575, "step": 9109 }, { "epoch": 0.3262485003670743, "grad_norm": 2.63657283782959, "learning_rate": 0.00015739537903992765, "loss": 1.3891, "step": 9110 }, { "epoch": 0.32628431249664264, "grad_norm": 1.7770910263061523, "learning_rate": 0.0001573858803649661, "loss": 1.4237, "step": 9111 }, { "epoch": 0.3263201246262109, "grad_norm": 2.1773881912231445, "learning_rate": 0.00015737638091795157, "loss": 1.6328, "step": 9112 }, { "epoch": 0.3263559367557792, "grad_norm": 1.9160898923873901, "learning_rate": 0.00015736688069901183, "loss": 1.5335, "step": 9113 }, { "epoch": 0.32639174888534744, "grad_norm": 1.9974308013916016, "learning_rate": 0.00015735737970827473, "loss": 1.1659, "step": 9114 }, { "epoch": 0.32642756101491577, "grad_norm": 1.807265281677246, "learning_rate": 0.00015734787794586806, "loss": 1.5504, "step": 9115 }, { "epoch": 0.32646337314448404, "grad_norm": 1.8353880643844604, "learning_rate": 0.00015733837541191968, "loss": 1.6066, "step": 9116 }, { "epoch": 0.3264991852740523, "grad_norm": 1.676720380783081, "learning_rate": 0.00015732887210655742, "loss": 1.4618, "step": 9117 }, { "epoch": 0.32653499740362063, "grad_norm": 1.6320055723190308, "learning_rate": 0.00015731936802990912, "loss": 1.7968, "step": 9118 }, { "epoch": 0.3265708095331889, "grad_norm": 2.2749667167663574, "learning_rate": 0.00015730986318210265, "loss": 1.4673, "step": 9119 }, { "epoch": 0.32660662166275717, "grad_norm": 1.3211688995361328, "learning_rate": 0.00015730035756326592, "loss": 1.5625, "step": 9120 }, { "epoch": 0.32664243379232544, "grad_norm": 1.8194984197616577, "learning_rate": 0.00015729085117352674, "loss": 1.4496, "step": 9121 }, { "epoch": 0.32667824592189376, "grad_norm": 2.0746090412139893, "learning_rate": 0.00015728134401301312, "loss": 1.2436, "step": 9122 }, { "epoch": 0.32671405805146203, "grad_norm": 1.5881251096725464, "learning_rate": 0.0001572718360818529, "loss": 1.3511, "step": 9123 }, { "epoch": 0.3267498701810303, "grad_norm": 1.9258278608322144, "learning_rate": 0.00015726232738017397, "loss": 1.5968, "step": 9124 }, { "epoch": 0.3267856823105986, "grad_norm": 1.449415922164917, "learning_rate": 0.00015725281790810431, "loss": 1.5124, "step": 9125 }, { "epoch": 0.3268214944401669, "grad_norm": 1.274713158607483, "learning_rate": 0.00015724330766577182, "loss": 1.6398, "step": 9126 }, { "epoch": 0.32685730656973516, "grad_norm": 1.757870078086853, "learning_rate": 0.0001572337966533045, "loss": 1.8657, "step": 9127 }, { "epoch": 0.32689311869930343, "grad_norm": 1.3837286233901978, "learning_rate": 0.0001572242848708302, "loss": 1.5027, "step": 9128 }, { "epoch": 0.32692893082887176, "grad_norm": 2.0841174125671387, "learning_rate": 0.00015721477231847702, "loss": 1.5041, "step": 9129 }, { "epoch": 0.32696474295844, "grad_norm": 3.516465663909912, "learning_rate": 0.00015720525899637285, "loss": 1.6544, "step": 9130 }, { "epoch": 0.3270005550880083, "grad_norm": 1.4936449527740479, "learning_rate": 0.00015719574490464573, "loss": 1.5635, "step": 9131 }, { "epoch": 0.3270363672175766, "grad_norm": 2.2506232261657715, "learning_rate": 0.00015718623004342362, "loss": 1.719, "step": 9132 }, { "epoch": 0.3270721793471449, "grad_norm": 1.8259614706039429, "learning_rate": 0.00015717671441283458, "loss": 1.7564, "step": 9133 }, { "epoch": 0.32710799147671316, "grad_norm": 1.6399543285369873, "learning_rate": 0.0001571671980130066, "loss": 1.8228, "step": 9134 }, { "epoch": 0.3271438036062814, "grad_norm": 1.5702282190322876, "learning_rate": 0.00015715768084406765, "loss": 1.5844, "step": 9135 }, { "epoch": 0.32717961573584975, "grad_norm": 1.4354286193847656, "learning_rate": 0.0001571481629061459, "loss": 1.5932, "step": 9136 }, { "epoch": 0.327215427865418, "grad_norm": 1.8592087030410767, "learning_rate": 0.0001571386441993693, "loss": 1.686, "step": 9137 }, { "epoch": 0.3272512399949863, "grad_norm": 1.372339129447937, "learning_rate": 0.00015712912472386597, "loss": 1.682, "step": 9138 }, { "epoch": 0.3272870521245546, "grad_norm": 1.4004724025726318, "learning_rate": 0.00015711960447976393, "loss": 1.834, "step": 9139 }, { "epoch": 0.3273228642541229, "grad_norm": 1.7258496284484863, "learning_rate": 0.0001571100834671913, "loss": 1.549, "step": 9140 }, { "epoch": 0.32735867638369115, "grad_norm": 1.3979535102844238, "learning_rate": 0.00015710056168627618, "loss": 1.689, "step": 9141 }, { "epoch": 0.3273944885132594, "grad_norm": 2.24281907081604, "learning_rate": 0.00015709103913714664, "loss": 1.5228, "step": 9142 }, { "epoch": 0.32743030064282774, "grad_norm": 1.3384792804718018, "learning_rate": 0.0001570815158199308, "loss": 1.2907, "step": 9143 }, { "epoch": 0.327466112772396, "grad_norm": 1.8024048805236816, "learning_rate": 0.00015707199173475682, "loss": 1.4062, "step": 9144 }, { "epoch": 0.3275019249019643, "grad_norm": 2.6472220420837402, "learning_rate": 0.00015706246688175282, "loss": 1.2139, "step": 9145 }, { "epoch": 0.3275377370315326, "grad_norm": 1.628482699394226, "learning_rate": 0.00015705294126104692, "loss": 1.4098, "step": 9146 }, { "epoch": 0.3275735491611009, "grad_norm": 1.4141241312026978, "learning_rate": 0.00015704341487276726, "loss": 1.2319, "step": 9147 }, { "epoch": 0.32760936129066914, "grad_norm": 1.8727307319641113, "learning_rate": 0.00015703388771704205, "loss": 1.6832, "step": 9148 }, { "epoch": 0.3276451734202374, "grad_norm": 1.9481236934661865, "learning_rate": 0.00015702435979399946, "loss": 1.4832, "step": 9149 }, { "epoch": 0.32768098554980574, "grad_norm": 1.4255954027175903, "learning_rate": 0.00015701483110376762, "loss": 1.3543, "step": 9150 }, { "epoch": 0.327716797679374, "grad_norm": 2.0080718994140625, "learning_rate": 0.00015700530164647485, "loss": 1.6513, "step": 9151 }, { "epoch": 0.3277526098089423, "grad_norm": 1.8739054203033447, "learning_rate": 0.00015699577142224924, "loss": 1.1768, "step": 9152 }, { "epoch": 0.3277884219385106, "grad_norm": 1.5352083444595337, "learning_rate": 0.000156986240431219, "loss": 1.4779, "step": 9153 }, { "epoch": 0.32782423406807887, "grad_norm": 1.6548919677734375, "learning_rate": 0.00015697670867351247, "loss": 1.5669, "step": 9154 }, { "epoch": 0.32786004619764714, "grad_norm": 1.8532062768936157, "learning_rate": 0.0001569671761492578, "loss": 1.713, "step": 9155 }, { "epoch": 0.3278958583272154, "grad_norm": 1.4983744621276855, "learning_rate": 0.00015695764285858323, "loss": 1.7685, "step": 9156 }, { "epoch": 0.32793167045678373, "grad_norm": 1.8667110204696655, "learning_rate": 0.00015694810880161706, "loss": 1.3894, "step": 9157 }, { "epoch": 0.327967482586352, "grad_norm": 1.7034077644348145, "learning_rate": 0.00015693857397848756, "loss": 1.6046, "step": 9158 }, { "epoch": 0.32800329471592027, "grad_norm": 2.344395399093628, "learning_rate": 0.00015692903838932299, "loss": 1.5522, "step": 9159 }, { "epoch": 0.3280391068454886, "grad_norm": 2.167020797729492, "learning_rate": 0.00015691950203425162, "loss": 1.3704, "step": 9160 }, { "epoch": 0.32807491897505686, "grad_norm": 1.6182739734649658, "learning_rate": 0.0001569099649134018, "loss": 1.3054, "step": 9161 }, { "epoch": 0.32811073110462513, "grad_norm": 1.4349606037139893, "learning_rate": 0.0001569004270269018, "loss": 1.4676, "step": 9162 }, { "epoch": 0.3281465432341934, "grad_norm": 1.9794306755065918, "learning_rate": 0.00015689088837487995, "loss": 1.8758, "step": 9163 }, { "epoch": 0.3281823553637617, "grad_norm": 1.2538909912109375, "learning_rate": 0.00015688134895746459, "loss": 1.5755, "step": 9164 }, { "epoch": 0.32821816749333, "grad_norm": 2.244776487350464, "learning_rate": 0.000156871808774784, "loss": 1.6322, "step": 9165 }, { "epoch": 0.32825397962289826, "grad_norm": 1.843839406967163, "learning_rate": 0.00015686226782696662, "loss": 1.6366, "step": 9166 }, { "epoch": 0.3282897917524666, "grad_norm": 2.0505659580230713, "learning_rate": 0.0001568527261141408, "loss": 1.5395, "step": 9167 }, { "epoch": 0.32832560388203486, "grad_norm": 1.6145918369293213, "learning_rate": 0.00015684318363643485, "loss": 1.4893, "step": 9168 }, { "epoch": 0.3283614160116031, "grad_norm": 1.7534884214401245, "learning_rate": 0.0001568336403939772, "loss": 1.5143, "step": 9169 }, { "epoch": 0.3283972281411714, "grad_norm": 1.496999740600586, "learning_rate": 0.00015682409638689623, "loss": 1.6016, "step": 9170 }, { "epoch": 0.3284330402707397, "grad_norm": 1.4966206550598145, "learning_rate": 0.00015681455161532034, "loss": 1.311, "step": 9171 }, { "epoch": 0.328468852400308, "grad_norm": 1.686967372894287, "learning_rate": 0.00015680500607937793, "loss": 1.2972, "step": 9172 }, { "epoch": 0.32850466452987626, "grad_norm": 1.885436773300171, "learning_rate": 0.00015679545977919745, "loss": 1.7629, "step": 9173 }, { "epoch": 0.3285404766594446, "grad_norm": 1.4676927328109741, "learning_rate": 0.0001567859127149073, "loss": 1.7276, "step": 9174 }, { "epoch": 0.32857628878901285, "grad_norm": 1.650899887084961, "learning_rate": 0.00015677636488663595, "loss": 1.5655, "step": 9175 }, { "epoch": 0.3286121009185811, "grad_norm": 1.4728350639343262, "learning_rate": 0.00015676681629451185, "loss": 1.682, "step": 9176 }, { "epoch": 0.3286479130481494, "grad_norm": 1.5229905843734741, "learning_rate": 0.0001567572669386635, "loss": 1.628, "step": 9177 }, { "epoch": 0.3286837251777177, "grad_norm": 1.990729808807373, "learning_rate": 0.0001567477168192193, "loss": 1.9226, "step": 9178 }, { "epoch": 0.328719537307286, "grad_norm": 1.5891399383544922, "learning_rate": 0.00015673816593630776, "loss": 1.3932, "step": 9179 }, { "epoch": 0.32875534943685425, "grad_norm": 1.5668306350708008, "learning_rate": 0.00015672861429005737, "loss": 1.3301, "step": 9180 }, { "epoch": 0.3287911615664226, "grad_norm": 1.5943611860275269, "learning_rate": 0.00015671906188059672, "loss": 1.5854, "step": 9181 }, { "epoch": 0.32882697369599084, "grad_norm": 1.9587252140045166, "learning_rate": 0.0001567095087080542, "loss": 1.8593, "step": 9182 }, { "epoch": 0.3288627858255591, "grad_norm": 1.4713166952133179, "learning_rate": 0.00015669995477255838, "loss": 1.6878, "step": 9183 }, { "epoch": 0.3288985979551274, "grad_norm": 2.106905937194824, "learning_rate": 0.00015669040007423784, "loss": 1.5709, "step": 9184 }, { "epoch": 0.3289344100846957, "grad_norm": 2.200146198272705, "learning_rate": 0.00015668084461322108, "loss": 1.6029, "step": 9185 }, { "epoch": 0.328970222214264, "grad_norm": 1.6008678674697876, "learning_rate": 0.00015667128838963668, "loss": 1.6772, "step": 9186 }, { "epoch": 0.32900603434383224, "grad_norm": 1.828938603401184, "learning_rate": 0.00015666173140361315, "loss": 1.7683, "step": 9187 }, { "epoch": 0.32904184647340057, "grad_norm": 2.305866241455078, "learning_rate": 0.00015665217365527917, "loss": 1.7502, "step": 9188 }, { "epoch": 0.32907765860296884, "grad_norm": 2.0801093578338623, "learning_rate": 0.00015664261514476322, "loss": 1.3371, "step": 9189 }, { "epoch": 0.3291134707325371, "grad_norm": 1.4453667402267456, "learning_rate": 0.00015663305587219396, "loss": 1.477, "step": 9190 }, { "epoch": 0.3291492828621054, "grad_norm": 1.6667442321777344, "learning_rate": 0.00015662349583770002, "loss": 1.8024, "step": 9191 }, { "epoch": 0.3291850949916737, "grad_norm": 1.4693574905395508, "learning_rate": 0.00015661393504140994, "loss": 1.3105, "step": 9192 }, { "epoch": 0.32922090712124197, "grad_norm": 1.4278935194015503, "learning_rate": 0.0001566043734834524, "loss": 1.8824, "step": 9193 }, { "epoch": 0.32925671925081024, "grad_norm": 2.0216543674468994, "learning_rate": 0.00015659481116395604, "loss": 1.6132, "step": 9194 }, { "epoch": 0.32929253138037856, "grad_norm": 1.8408523797988892, "learning_rate": 0.0001565852480830495, "loss": 1.191, "step": 9195 }, { "epoch": 0.32932834350994683, "grad_norm": 1.845346450805664, "learning_rate": 0.00015657568424086145, "loss": 1.8479, "step": 9196 }, { "epoch": 0.3293641556395151, "grad_norm": 1.3020843267440796, "learning_rate": 0.0001565661196375205, "loss": 1.3387, "step": 9197 }, { "epoch": 0.32939996776908337, "grad_norm": 1.6613608598709106, "learning_rate": 0.00015655655427315542, "loss": 1.4907, "step": 9198 }, { "epoch": 0.3294357798986517, "grad_norm": 1.7599493265151978, "learning_rate": 0.00015654698814789484, "loss": 1.5625, "step": 9199 }, { "epoch": 0.32947159202821996, "grad_norm": 2.014738082885742, "learning_rate": 0.00015653742126186745, "loss": 1.6346, "step": 9200 }, { "epoch": 0.32950740415778823, "grad_norm": 1.7053759098052979, "learning_rate": 0.00015652785361520204, "loss": 1.7228, "step": 9201 }, { "epoch": 0.32954321628735656, "grad_norm": 1.6620781421661377, "learning_rate": 0.00015651828520802722, "loss": 1.5416, "step": 9202 }, { "epoch": 0.3295790284169248, "grad_norm": 1.5674805641174316, "learning_rate": 0.00015650871604047182, "loss": 1.6512, "step": 9203 }, { "epoch": 0.3296148405464931, "grad_norm": 1.6600102186203003, "learning_rate": 0.0001564991461126645, "loss": 1.8433, "step": 9204 }, { "epoch": 0.32965065267606136, "grad_norm": 2.119927167892456, "learning_rate": 0.00015648957542473406, "loss": 1.6382, "step": 9205 }, { "epoch": 0.3296864648056297, "grad_norm": 2.353123426437378, "learning_rate": 0.00015648000397680924, "loss": 1.4772, "step": 9206 }, { "epoch": 0.32972227693519796, "grad_norm": 2.0134799480438232, "learning_rate": 0.00015647043176901886, "loss": 1.6496, "step": 9207 }, { "epoch": 0.3297580890647662, "grad_norm": 1.255784034729004, "learning_rate": 0.00015646085880149162, "loss": 1.6315, "step": 9208 }, { "epoch": 0.3297939011943345, "grad_norm": 1.3342036008834839, "learning_rate": 0.00015645128507435637, "loss": 1.3743, "step": 9209 }, { "epoch": 0.3298297133239028, "grad_norm": 1.8134342432022095, "learning_rate": 0.00015644171058774192, "loss": 1.6786, "step": 9210 }, { "epoch": 0.3298655254534711, "grad_norm": 1.3972450494766235, "learning_rate": 0.000156432135341777, "loss": 1.6555, "step": 9211 }, { "epoch": 0.32990133758303936, "grad_norm": 1.5140109062194824, "learning_rate": 0.00015642255933659053, "loss": 1.5552, "step": 9212 }, { "epoch": 0.3299371497126077, "grad_norm": 1.5048795938491821, "learning_rate": 0.0001564129825723113, "loss": 1.4055, "step": 9213 }, { "epoch": 0.32997296184217595, "grad_norm": 2.151780605316162, "learning_rate": 0.00015640340504906818, "loss": 1.5771, "step": 9214 }, { "epoch": 0.3300087739717442, "grad_norm": 1.7445200681686401, "learning_rate": 0.00015639382676698997, "loss": 1.8844, "step": 9215 }, { "epoch": 0.3300445861013125, "grad_norm": 1.9518778324127197, "learning_rate": 0.00015638424772620554, "loss": 2.0622, "step": 9216 }, { "epoch": 0.3300803982308808, "grad_norm": 2.5693469047546387, "learning_rate": 0.00015637466792684383, "loss": 1.3957, "step": 9217 }, { "epoch": 0.3301162103604491, "grad_norm": 1.5530214309692383, "learning_rate": 0.00015636508736903366, "loss": 1.3895, "step": 9218 }, { "epoch": 0.33015202249001735, "grad_norm": 1.2154394388198853, "learning_rate": 0.00015635550605290396, "loss": 1.561, "step": 9219 }, { "epoch": 0.3301878346195857, "grad_norm": 1.5539181232452393, "learning_rate": 0.00015634592397858362, "loss": 1.2993, "step": 9220 }, { "epoch": 0.33022364674915394, "grad_norm": 2.4144694805145264, "learning_rate": 0.00015633634114620154, "loss": 2.0973, "step": 9221 }, { "epoch": 0.3302594588787222, "grad_norm": 1.9893107414245605, "learning_rate": 0.00015632675755588668, "loss": 2.0019, "step": 9222 }, { "epoch": 0.3302952710082905, "grad_norm": 1.4870593547821045, "learning_rate": 0.00015631717320776795, "loss": 1.1392, "step": 9223 }, { "epoch": 0.3303310831378588, "grad_norm": 1.4275274276733398, "learning_rate": 0.00015630758810197427, "loss": 1.6437, "step": 9224 }, { "epoch": 0.3303668952674271, "grad_norm": 1.9945303201675415, "learning_rate": 0.00015629800223863465, "loss": 1.5218, "step": 9225 }, { "epoch": 0.33040270739699534, "grad_norm": 1.4058603048324585, "learning_rate": 0.000156288415617878, "loss": 1.4086, "step": 9226 }, { "epoch": 0.33043851952656367, "grad_norm": 1.57683265209198, "learning_rate": 0.00015627882823983336, "loss": 1.6094, "step": 9227 }, { "epoch": 0.33047433165613194, "grad_norm": 3.029585361480713, "learning_rate": 0.00015626924010462968, "loss": 1.7547, "step": 9228 }, { "epoch": 0.3305101437857002, "grad_norm": 1.5655510425567627, "learning_rate": 0.00015625965121239592, "loss": 1.8381, "step": 9229 }, { "epoch": 0.3305459559152685, "grad_norm": 2.5353496074676514, "learning_rate": 0.00015625006156326117, "loss": 1.8627, "step": 9230 }, { "epoch": 0.3305817680448368, "grad_norm": 1.9974932670593262, "learning_rate": 0.00015624047115735435, "loss": 1.605, "step": 9231 }, { "epoch": 0.33061758017440507, "grad_norm": 2.7682440280914307, "learning_rate": 0.00015623087999480458, "loss": 1.9736, "step": 9232 }, { "epoch": 0.33065339230397334, "grad_norm": 1.4476059675216675, "learning_rate": 0.00015622128807574081, "loss": 1.7133, "step": 9233 }, { "epoch": 0.33068920443354166, "grad_norm": 1.8865472078323364, "learning_rate": 0.00015621169540029216, "loss": 1.4769, "step": 9234 }, { "epoch": 0.33072501656310993, "grad_norm": 2.4696617126464844, "learning_rate": 0.00015620210196858763, "loss": 1.5348, "step": 9235 }, { "epoch": 0.3307608286926782, "grad_norm": 1.3538154363632202, "learning_rate": 0.00015619250778075634, "loss": 1.5639, "step": 9236 }, { "epoch": 0.33079664082224647, "grad_norm": 1.380228042602539, "learning_rate": 0.00015618291283692735, "loss": 1.4208, "step": 9237 }, { "epoch": 0.3308324529518148, "grad_norm": 1.5593613386154175, "learning_rate": 0.0001561733171372297, "loss": 1.4575, "step": 9238 }, { "epoch": 0.33086826508138306, "grad_norm": 1.0856677293777466, "learning_rate": 0.00015616372068179255, "loss": 1.287, "step": 9239 }, { "epoch": 0.33090407721095133, "grad_norm": 1.7727956771850586, "learning_rate": 0.00015615412347074498, "loss": 1.4814, "step": 9240 }, { "epoch": 0.33093988934051966, "grad_norm": 2.7936644554138184, "learning_rate": 0.0001561445255042161, "loss": 1.4023, "step": 9241 }, { "epoch": 0.3309757014700879, "grad_norm": 1.5259954929351807, "learning_rate": 0.00015613492678233509, "loss": 1.4711, "step": 9242 }, { "epoch": 0.3310115135996562, "grad_norm": 1.6369067430496216, "learning_rate": 0.000156125327305231, "loss": 1.6207, "step": 9243 }, { "epoch": 0.33104732572922446, "grad_norm": 1.5903338193893433, "learning_rate": 0.00015611572707303307, "loss": 1.5504, "step": 9244 }, { "epoch": 0.3310831378587928, "grad_norm": 1.5647993087768555, "learning_rate": 0.00015610612608587035, "loss": 1.722, "step": 9245 }, { "epoch": 0.33111894998836106, "grad_norm": 1.9212912321090698, "learning_rate": 0.00015609652434387216, "loss": 1.2589, "step": 9246 }, { "epoch": 0.3311547621179293, "grad_norm": 1.876175045967102, "learning_rate": 0.00015608692184716753, "loss": 1.5151, "step": 9247 }, { "epoch": 0.33119057424749765, "grad_norm": 2.0057926177978516, "learning_rate": 0.00015607731859588575, "loss": 1.4391, "step": 9248 }, { "epoch": 0.3312263863770659, "grad_norm": 1.4844675064086914, "learning_rate": 0.00015606771459015598, "loss": 1.8257, "step": 9249 }, { "epoch": 0.3312621985066342, "grad_norm": 1.5540400743484497, "learning_rate": 0.00015605810983010743, "loss": 1.2488, "step": 9250 }, { "epoch": 0.33129801063620246, "grad_norm": 2.0735278129577637, "learning_rate": 0.0001560485043158693, "loss": 1.4661, "step": 9251 }, { "epoch": 0.3313338227657708, "grad_norm": 1.5882608890533447, "learning_rate": 0.00015603889804757085, "loss": 1.4818, "step": 9252 }, { "epoch": 0.33136963489533905, "grad_norm": 1.3272738456726074, "learning_rate": 0.00015602929102534132, "loss": 1.6939, "step": 9253 }, { "epoch": 0.3314054470249073, "grad_norm": 1.2422311305999756, "learning_rate": 0.00015601968324930997, "loss": 1.3206, "step": 9254 }, { "epoch": 0.33144125915447564, "grad_norm": 1.8080556392669678, "learning_rate": 0.000156010074719606, "loss": 1.2622, "step": 9255 }, { "epoch": 0.3314770712840439, "grad_norm": 1.7431944608688354, "learning_rate": 0.00015600046543635875, "loss": 1.4804, "step": 9256 }, { "epoch": 0.3315128834136122, "grad_norm": 1.3665114641189575, "learning_rate": 0.0001559908553996975, "loss": 1.6605, "step": 9257 }, { "epoch": 0.33154869554318045, "grad_norm": 1.7952216863632202, "learning_rate": 0.00015598124460975148, "loss": 1.6897, "step": 9258 }, { "epoch": 0.3315845076727488, "grad_norm": 1.934351921081543, "learning_rate": 0.00015597163306665002, "loss": 1.2434, "step": 9259 }, { "epoch": 0.33162031980231704, "grad_norm": 1.282073974609375, "learning_rate": 0.00015596202077052245, "loss": 1.1964, "step": 9260 }, { "epoch": 0.3316561319318853, "grad_norm": 1.3141311407089233, "learning_rate": 0.00015595240772149803, "loss": 1.2027, "step": 9261 }, { "epoch": 0.33169194406145364, "grad_norm": 1.557449221611023, "learning_rate": 0.0001559427939197062, "loss": 1.7988, "step": 9262 }, { "epoch": 0.3317277561910219, "grad_norm": 1.4467054605484009, "learning_rate": 0.0001559331793652762, "loss": 1.2495, "step": 9263 }, { "epoch": 0.3317635683205902, "grad_norm": 1.3926703929901123, "learning_rate": 0.00015592356405833745, "loss": 1.5037, "step": 9264 }, { "epoch": 0.33179938045015844, "grad_norm": 2.078423261642456, "learning_rate": 0.00015591394799901927, "loss": 1.6851, "step": 9265 }, { "epoch": 0.33183519257972677, "grad_norm": 2.431959867477417, "learning_rate": 0.00015590433118745106, "loss": 1.8476, "step": 9266 }, { "epoch": 0.33187100470929504, "grad_norm": 1.8334934711456299, "learning_rate": 0.00015589471362376217, "loss": 1.8419, "step": 9267 }, { "epoch": 0.3319068168388633, "grad_norm": 1.5225297212600708, "learning_rate": 0.00015588509530808199, "loss": 1.9179, "step": 9268 }, { "epoch": 0.33194262896843163, "grad_norm": 1.3365960121154785, "learning_rate": 0.00015587547624053993, "loss": 1.1894, "step": 9269 }, { "epoch": 0.3319784410979999, "grad_norm": 2.1068549156188965, "learning_rate": 0.00015586585642126543, "loss": 1.6613, "step": 9270 }, { "epoch": 0.33201425322756817, "grad_norm": 1.5305715799331665, "learning_rate": 0.00015585623585038792, "loss": 1.4709, "step": 9271 }, { "epoch": 0.33205006535713644, "grad_norm": 1.8630884885787964, "learning_rate": 0.00015584661452803676, "loss": 1.6649, "step": 9272 }, { "epoch": 0.33208587748670476, "grad_norm": 1.756103754043579, "learning_rate": 0.00015583699245434146, "loss": 1.5741, "step": 9273 }, { "epoch": 0.33212168961627303, "grad_norm": 1.6057642698287964, "learning_rate": 0.00015582736962943148, "loss": 1.6483, "step": 9274 }, { "epoch": 0.3321575017458413, "grad_norm": 1.2686374187469482, "learning_rate": 0.00015581774605343622, "loss": 1.4546, "step": 9275 }, { "epoch": 0.3321933138754096, "grad_norm": 1.5505086183547974, "learning_rate": 0.0001558081217264852, "loss": 1.6977, "step": 9276 }, { "epoch": 0.3322291260049779, "grad_norm": 1.148769736289978, "learning_rate": 0.00015579849664870788, "loss": 1.4565, "step": 9277 }, { "epoch": 0.33226493813454616, "grad_norm": 1.3341801166534424, "learning_rate": 0.00015578887082023373, "loss": 1.4761, "step": 9278 }, { "epoch": 0.33230075026411443, "grad_norm": 1.950057029724121, "learning_rate": 0.00015577924424119233, "loss": 1.4574, "step": 9279 }, { "epoch": 0.33233656239368276, "grad_norm": 1.5431779623031616, "learning_rate": 0.00015576961691171314, "loss": 1.4851, "step": 9280 }, { "epoch": 0.332372374523251, "grad_norm": 1.7088158130645752, "learning_rate": 0.0001557599888319257, "loss": 1.7068, "step": 9281 }, { "epoch": 0.3324081866528193, "grad_norm": 2.0593230724334717, "learning_rate": 0.00015575036000195952, "loss": 1.4745, "step": 9282 }, { "epoch": 0.3324439987823876, "grad_norm": 1.5324299335479736, "learning_rate": 0.00015574073042194417, "loss": 1.8138, "step": 9283 }, { "epoch": 0.3324798109119559, "grad_norm": 1.420212984085083, "learning_rate": 0.0001557311000920092, "loss": 1.5369, "step": 9284 }, { "epoch": 0.33251562304152416, "grad_norm": 2.050830364227295, "learning_rate": 0.00015572146901228414, "loss": 1.6046, "step": 9285 }, { "epoch": 0.3325514351710924, "grad_norm": 1.4355131387710571, "learning_rate": 0.0001557118371828986, "loss": 1.4598, "step": 9286 }, { "epoch": 0.33258724730066075, "grad_norm": 2.0514612197875977, "learning_rate": 0.00015570220460398216, "loss": 1.3749, "step": 9287 }, { "epoch": 0.332623059430229, "grad_norm": 1.370781421661377, "learning_rate": 0.00015569257127566441, "loss": 1.6945, "step": 9288 }, { "epoch": 0.3326588715597973, "grad_norm": 1.860186219215393, "learning_rate": 0.00015568293719807493, "loss": 1.5742, "step": 9289 }, { "epoch": 0.3326946836893656, "grad_norm": 1.4769200086593628, "learning_rate": 0.00015567330237134338, "loss": 1.6708, "step": 9290 }, { "epoch": 0.3327304958189339, "grad_norm": 1.4108153581619263, "learning_rate": 0.00015566366679559937, "loss": 1.5285, "step": 9291 }, { "epoch": 0.33276630794850215, "grad_norm": 1.5427703857421875, "learning_rate": 0.0001556540304709725, "loss": 1.3293, "step": 9292 }, { "epoch": 0.3328021200780704, "grad_norm": 1.8373152017593384, "learning_rate": 0.00015564439339759245, "loss": 1.6855, "step": 9293 }, { "epoch": 0.33283793220763874, "grad_norm": 2.347149133682251, "learning_rate": 0.00015563475557558887, "loss": 1.4346, "step": 9294 }, { "epoch": 0.332873744337207, "grad_norm": 2.9432506561279297, "learning_rate": 0.00015562511700509138, "loss": 1.3162, "step": 9295 }, { "epoch": 0.3329095564667753, "grad_norm": 2.5354115962982178, "learning_rate": 0.00015561547768622974, "loss": 1.7069, "step": 9296 }, { "epoch": 0.3329453685963436, "grad_norm": 1.4742679595947266, "learning_rate": 0.00015560583761913357, "loss": 1.2812, "step": 9297 }, { "epoch": 0.3329811807259119, "grad_norm": 1.7055310010910034, "learning_rate": 0.00015559619680393256, "loss": 1.54, "step": 9298 }, { "epoch": 0.33301699285548014, "grad_norm": 1.5904908180236816, "learning_rate": 0.00015558655524075646, "loss": 1.4334, "step": 9299 }, { "epoch": 0.3330528049850484, "grad_norm": 1.9072233438491821, "learning_rate": 0.00015557691292973494, "loss": 1.4061, "step": 9300 }, { "epoch": 0.33308861711461674, "grad_norm": 1.6825381517410278, "learning_rate": 0.0001555672698709978, "loss": 1.3886, "step": 9301 }, { "epoch": 0.333124429244185, "grad_norm": 1.6894252300262451, "learning_rate": 0.00015555762606467465, "loss": 1.4306, "step": 9302 }, { "epoch": 0.3331602413737533, "grad_norm": 1.9015343189239502, "learning_rate": 0.00015554798151089534, "loss": 1.3311, "step": 9303 }, { "epoch": 0.3331960535033216, "grad_norm": 2.412245512008667, "learning_rate": 0.00015553833620978957, "loss": 1.5272, "step": 9304 }, { "epoch": 0.33323186563288987, "grad_norm": 2.0586354732513428, "learning_rate": 0.00015552869016148714, "loss": 1.8866, "step": 9305 }, { "epoch": 0.33326767776245814, "grad_norm": 1.8574638366699219, "learning_rate": 0.0001555190433661178, "loss": 1.4357, "step": 9306 }, { "epoch": 0.3333034898920264, "grad_norm": 1.5744131803512573, "learning_rate": 0.00015550939582381135, "loss": 1.3504, "step": 9307 }, { "epoch": 0.33333930202159473, "grad_norm": 1.7387135028839111, "learning_rate": 0.00015549974753469763, "loss": 1.5314, "step": 9308 }, { "epoch": 0.333375114151163, "grad_norm": 1.4193501472473145, "learning_rate": 0.00015549009849890634, "loss": 1.3785, "step": 9309 }, { "epoch": 0.33341092628073127, "grad_norm": 1.4320520162582397, "learning_rate": 0.0001554804487165674, "loss": 1.7415, "step": 9310 }, { "epoch": 0.3334467384102996, "grad_norm": 2.080427408218384, "learning_rate": 0.00015547079818781055, "loss": 1.4669, "step": 9311 }, { "epoch": 0.33348255053986786, "grad_norm": 2.3720574378967285, "learning_rate": 0.00015546114691276567, "loss": 1.5753, "step": 9312 }, { "epoch": 0.33351836266943613, "grad_norm": 1.8366647958755493, "learning_rate": 0.0001554514948915626, "loss": 1.5028, "step": 9313 }, { "epoch": 0.3335541747990044, "grad_norm": 1.6955592632293701, "learning_rate": 0.00015544184212433116, "loss": 1.5171, "step": 9314 }, { "epoch": 0.3335899869285727, "grad_norm": 2.041065216064453, "learning_rate": 0.00015543218861120125, "loss": 1.2655, "step": 9315 }, { "epoch": 0.333625799058141, "grad_norm": 1.9579260349273682, "learning_rate": 0.00015542253435230278, "loss": 1.4517, "step": 9316 }, { "epoch": 0.33366161118770926, "grad_norm": 1.472667932510376, "learning_rate": 0.0001554128793477656, "loss": 1.2546, "step": 9317 }, { "epoch": 0.3336974233172776, "grad_norm": 1.5596768856048584, "learning_rate": 0.0001554032235977196, "loss": 1.5515, "step": 9318 }, { "epoch": 0.33373323544684586, "grad_norm": 1.527860164642334, "learning_rate": 0.0001553935671022947, "loss": 1.3611, "step": 9319 }, { "epoch": 0.3337690475764141, "grad_norm": 1.6964343786239624, "learning_rate": 0.00015538390986162082, "loss": 1.5114, "step": 9320 }, { "epoch": 0.3338048597059824, "grad_norm": 1.6963163614273071, "learning_rate": 0.00015537425187582785, "loss": 1.7816, "step": 9321 }, { "epoch": 0.3338406718355507, "grad_norm": 1.6652700901031494, "learning_rate": 0.00015536459314504573, "loss": 1.8709, "step": 9322 }, { "epoch": 0.333876483965119, "grad_norm": 1.7200714349746704, "learning_rate": 0.00015535493366940442, "loss": 1.7631, "step": 9323 }, { "epoch": 0.33391229609468726, "grad_norm": 2.2434933185577393, "learning_rate": 0.0001553452734490339, "loss": 1.6587, "step": 9324 }, { "epoch": 0.3339481082242556, "grad_norm": 1.4792317152023315, "learning_rate": 0.00015533561248406413, "loss": 1.5377, "step": 9325 }, { "epoch": 0.33398392035382385, "grad_norm": 1.9175893068313599, "learning_rate": 0.00015532595077462507, "loss": 1.6146, "step": 9326 }, { "epoch": 0.3340197324833921, "grad_norm": 3.432957649230957, "learning_rate": 0.0001553162883208467, "loss": 1.5732, "step": 9327 }, { "epoch": 0.3340555446129604, "grad_norm": 1.9333349466323853, "learning_rate": 0.00015530662512285902, "loss": 1.211, "step": 9328 }, { "epoch": 0.3340913567425287, "grad_norm": 2.2683115005493164, "learning_rate": 0.00015529696118079205, "loss": 1.5071, "step": 9329 }, { "epoch": 0.334127168872097, "grad_norm": 1.9926609992980957, "learning_rate": 0.00015528729649477574, "loss": 1.571, "step": 9330 }, { "epoch": 0.33416298100166525, "grad_norm": 1.6616355180740356, "learning_rate": 0.00015527763106494024, "loss": 1.4256, "step": 9331 }, { "epoch": 0.3341987931312336, "grad_norm": 1.9569220542907715, "learning_rate": 0.0001552679648914155, "loss": 1.8484, "step": 9332 }, { "epoch": 0.33423460526080184, "grad_norm": 1.5473068952560425, "learning_rate": 0.00015525829797433157, "loss": 1.405, "step": 9333 }, { "epoch": 0.3342704173903701, "grad_norm": 1.8051518201828003, "learning_rate": 0.00015524863031381853, "loss": 1.2997, "step": 9334 }, { "epoch": 0.3343062295199384, "grad_norm": 1.7402265071868896, "learning_rate": 0.00015523896191000643, "loss": 1.7362, "step": 9335 }, { "epoch": 0.3343420416495067, "grad_norm": 1.6411949396133423, "learning_rate": 0.00015522929276302536, "loss": 1.3214, "step": 9336 }, { "epoch": 0.334377853779075, "grad_norm": 1.3993909358978271, "learning_rate": 0.0001552196228730054, "loss": 1.3546, "step": 9337 }, { "epoch": 0.33441366590864324, "grad_norm": 2.0956623554229736, "learning_rate": 0.00015520995224007662, "loss": 1.5105, "step": 9338 }, { "epoch": 0.33444947803821157, "grad_norm": 1.8981349468231201, "learning_rate": 0.00015520028086436915, "loss": 1.4384, "step": 9339 }, { "epoch": 0.33448529016777984, "grad_norm": 2.153280258178711, "learning_rate": 0.00015519060874601313, "loss": 1.3638, "step": 9340 }, { "epoch": 0.3345211022973481, "grad_norm": 1.4576733112335205, "learning_rate": 0.00015518093588513863, "loss": 1.5844, "step": 9341 }, { "epoch": 0.3345569144269164, "grad_norm": 1.9353009462356567, "learning_rate": 0.0001551712622818758, "loss": 1.6606, "step": 9342 }, { "epoch": 0.3345927265564847, "grad_norm": 1.6368129253387451, "learning_rate": 0.00015516158793635486, "loss": 1.2273, "step": 9343 }, { "epoch": 0.33462853868605297, "grad_norm": 1.8436633348464966, "learning_rate": 0.00015515191284870588, "loss": 1.771, "step": 9344 }, { "epoch": 0.33466435081562124, "grad_norm": 1.6525850296020508, "learning_rate": 0.00015514223701905904, "loss": 1.375, "step": 9345 }, { "epoch": 0.33470016294518956, "grad_norm": 1.5455073118209839, "learning_rate": 0.00015513256044754457, "loss": 1.6118, "step": 9346 }, { "epoch": 0.33473597507475783, "grad_norm": 1.7292460203170776, "learning_rate": 0.00015512288313429258, "loss": 1.6382, "step": 9347 }, { "epoch": 0.3347717872043261, "grad_norm": 2.00976824760437, "learning_rate": 0.0001551132050794333, "loss": 1.4821, "step": 9348 }, { "epoch": 0.33480759933389437, "grad_norm": 2.144354820251465, "learning_rate": 0.00015510352628309693, "loss": 1.8286, "step": 9349 }, { "epoch": 0.3348434114634627, "grad_norm": 1.8691902160644531, "learning_rate": 0.00015509384674541372, "loss": 1.5248, "step": 9350 }, { "epoch": 0.33487922359303096, "grad_norm": 1.6684008836746216, "learning_rate": 0.00015508416646651385, "loss": 1.5576, "step": 9351 }, { "epoch": 0.33491503572259923, "grad_norm": 1.8261315822601318, "learning_rate": 0.0001550744854465276, "loss": 1.704, "step": 9352 }, { "epoch": 0.33495084785216755, "grad_norm": 2.0527100563049316, "learning_rate": 0.00015506480368558516, "loss": 1.7708, "step": 9353 }, { "epoch": 0.3349866599817358, "grad_norm": 1.6841418743133545, "learning_rate": 0.00015505512118381683, "loss": 1.365, "step": 9354 }, { "epoch": 0.3350224721113041, "grad_norm": 1.531112790107727, "learning_rate": 0.00015504543794135284, "loss": 1.527, "step": 9355 }, { "epoch": 0.33505828424087236, "grad_norm": 1.4720302820205688, "learning_rate": 0.00015503575395832352, "loss": 1.4721, "step": 9356 }, { "epoch": 0.3350940963704407, "grad_norm": 2.4081616401672363, "learning_rate": 0.00015502606923485906, "loss": 1.4112, "step": 9357 }, { "epoch": 0.33512990850000896, "grad_norm": 1.4726104736328125, "learning_rate": 0.00015501638377108987, "loss": 1.4118, "step": 9358 }, { "epoch": 0.3351657206295772, "grad_norm": 1.6351170539855957, "learning_rate": 0.00015500669756714618, "loss": 1.341, "step": 9359 }, { "epoch": 0.33520153275914555, "grad_norm": 2.084728717803955, "learning_rate": 0.0001549970106231583, "loss": 1.457, "step": 9360 }, { "epoch": 0.3352373448887138, "grad_norm": 1.7264491319656372, "learning_rate": 0.00015498732293925667, "loss": 1.5515, "step": 9361 }, { "epoch": 0.3352731570182821, "grad_norm": 1.6600877046585083, "learning_rate": 0.00015497763451557148, "loss": 1.5989, "step": 9362 }, { "epoch": 0.33530896914785036, "grad_norm": 1.4888222217559814, "learning_rate": 0.00015496794535223315, "loss": 1.0686, "step": 9363 }, { "epoch": 0.3353447812774187, "grad_norm": 2.209500312805176, "learning_rate": 0.000154958255449372, "loss": 1.722, "step": 9364 }, { "epoch": 0.33538059340698695, "grad_norm": 1.8588663339614868, "learning_rate": 0.00015494856480711844, "loss": 1.3165, "step": 9365 }, { "epoch": 0.3354164055365552, "grad_norm": 2.067023754119873, "learning_rate": 0.0001549388734256028, "loss": 1.8567, "step": 9366 }, { "epoch": 0.33545221766612354, "grad_norm": 1.5753854513168335, "learning_rate": 0.00015492918130495547, "loss": 1.2895, "step": 9367 }, { "epoch": 0.3354880297956918, "grad_norm": 1.8132989406585693, "learning_rate": 0.0001549194884453069, "loss": 1.4248, "step": 9368 }, { "epoch": 0.3355238419252601, "grad_norm": 1.5888770818710327, "learning_rate": 0.00015490979484678743, "loss": 1.2666, "step": 9369 }, { "epoch": 0.33555965405482835, "grad_norm": 2.0815927982330322, "learning_rate": 0.0001549001005095275, "loss": 1.6768, "step": 9370 }, { "epoch": 0.3355954661843967, "grad_norm": 1.6123459339141846, "learning_rate": 0.00015489040543365754, "loss": 1.4473, "step": 9371 }, { "epoch": 0.33563127831396494, "grad_norm": 1.461475133895874, "learning_rate": 0.00015488070961930796, "loss": 1.4638, "step": 9372 }, { "epoch": 0.3356670904435332, "grad_norm": 1.3855684995651245, "learning_rate": 0.00015487101306660924, "loss": 1.3953, "step": 9373 }, { "epoch": 0.33570290257310154, "grad_norm": 2.608703851699829, "learning_rate": 0.00015486131577569182, "loss": 1.5456, "step": 9374 }, { "epoch": 0.3357387147026698, "grad_norm": 1.4365531206130981, "learning_rate": 0.00015485161774668615, "loss": 1.5599, "step": 9375 }, { "epoch": 0.3357745268322381, "grad_norm": 1.4558398723602295, "learning_rate": 0.00015484191897972274, "loss": 1.4977, "step": 9376 }, { "epoch": 0.33581033896180634, "grad_norm": 1.7082833051681519, "learning_rate": 0.000154832219474932, "loss": 1.3407, "step": 9377 }, { "epoch": 0.33584615109137467, "grad_norm": 1.5729615688323975, "learning_rate": 0.00015482251923244452, "loss": 1.4501, "step": 9378 }, { "epoch": 0.33588196322094294, "grad_norm": 1.2978506088256836, "learning_rate": 0.00015481281825239072, "loss": 1.5467, "step": 9379 }, { "epoch": 0.3359177753505112, "grad_norm": 1.3954700231552124, "learning_rate": 0.00015480311653490124, "loss": 1.4613, "step": 9380 }, { "epoch": 0.33595358748007953, "grad_norm": 1.4414048194885254, "learning_rate": 0.00015479341408010643, "loss": 1.4413, "step": 9381 }, { "epoch": 0.3359893996096478, "grad_norm": 1.9647263288497925, "learning_rate": 0.00015478371088813696, "loss": 1.8244, "step": 9382 }, { "epoch": 0.33602521173921607, "grad_norm": 1.619997262954712, "learning_rate": 0.0001547740069591233, "loss": 1.6385, "step": 9383 }, { "epoch": 0.33606102386878434, "grad_norm": 1.9023948907852173, "learning_rate": 0.00015476430229319603, "loss": 1.5266, "step": 9384 }, { "epoch": 0.33609683599835266, "grad_norm": 1.7760009765625, "learning_rate": 0.00015475459689048572, "loss": 1.5373, "step": 9385 }, { "epoch": 0.33613264812792093, "grad_norm": 1.5633459091186523, "learning_rate": 0.00015474489075112296, "loss": 1.4418, "step": 9386 }, { "epoch": 0.3361684602574892, "grad_norm": 1.4270317554473877, "learning_rate": 0.00015473518387523825, "loss": 1.5547, "step": 9387 }, { "epoch": 0.3362042723870575, "grad_norm": 2.286064386367798, "learning_rate": 0.0001547254762629623, "loss": 1.5236, "step": 9388 }, { "epoch": 0.3362400845166258, "grad_norm": 1.7473195791244507, "learning_rate": 0.00015471576791442564, "loss": 1.5732, "step": 9389 }, { "epoch": 0.33627589664619406, "grad_norm": 1.81753408908844, "learning_rate": 0.00015470605882975891, "loss": 1.4656, "step": 9390 }, { "epoch": 0.33631170877576233, "grad_norm": 1.6820625066757202, "learning_rate": 0.00015469634900909271, "loss": 1.4767, "step": 9391 }, { "epoch": 0.33634752090533065, "grad_norm": 1.315958023071289, "learning_rate": 0.00015468663845255768, "loss": 1.5724, "step": 9392 }, { "epoch": 0.3363833330348989, "grad_norm": 2.7729685306549072, "learning_rate": 0.0001546769271602845, "loss": 1.4956, "step": 9393 }, { "epoch": 0.3364191451644672, "grad_norm": 2.246842622756958, "learning_rate": 0.0001546672151324038, "loss": 1.6252, "step": 9394 }, { "epoch": 0.3364549572940355, "grad_norm": 1.5574842691421509, "learning_rate": 0.0001546575023690462, "loss": 1.6173, "step": 9395 }, { "epoch": 0.3364907694236038, "grad_norm": 1.6773743629455566, "learning_rate": 0.00015464778887034242, "loss": 1.4211, "step": 9396 }, { "epoch": 0.33652658155317206, "grad_norm": 1.4379688501358032, "learning_rate": 0.0001546380746364231, "loss": 1.5853, "step": 9397 }, { "epoch": 0.3365623936827403, "grad_norm": 1.9088571071624756, "learning_rate": 0.00015462835966741903, "loss": 0.9463, "step": 9398 }, { "epoch": 0.33659820581230865, "grad_norm": 1.3112964630126953, "learning_rate": 0.0001546186439634608, "loss": 1.6045, "step": 9399 }, { "epoch": 0.3366340179418769, "grad_norm": 1.9358813762664795, "learning_rate": 0.0001546089275246792, "loss": 1.6525, "step": 9400 }, { "epoch": 0.3366698300714452, "grad_norm": 1.9341341257095337, "learning_rate": 0.00015459921035120488, "loss": 1.5339, "step": 9401 }, { "epoch": 0.3367056422010135, "grad_norm": 1.2685867547988892, "learning_rate": 0.00015458949244316866, "loss": 1.5571, "step": 9402 }, { "epoch": 0.3367414543305818, "grad_norm": 1.3416786193847656, "learning_rate": 0.00015457977380070118, "loss": 1.4485, "step": 9403 }, { "epoch": 0.33677726646015005, "grad_norm": 2.085193157196045, "learning_rate": 0.00015457005442393327, "loss": 1.656, "step": 9404 }, { "epoch": 0.3368130785897183, "grad_norm": 1.7671416997909546, "learning_rate": 0.00015456033431299567, "loss": 1.3032, "step": 9405 }, { "epoch": 0.33684889071928664, "grad_norm": 1.6520533561706543, "learning_rate": 0.00015455061346801916, "loss": 1.3252, "step": 9406 }, { "epoch": 0.3368847028488549, "grad_norm": 2.0870721340179443, "learning_rate": 0.00015454089188913454, "loss": 1.3519, "step": 9407 }, { "epoch": 0.3369205149784232, "grad_norm": 2.2233269214630127, "learning_rate": 0.00015453116957647254, "loss": 1.4443, "step": 9408 }, { "epoch": 0.33695632710799145, "grad_norm": 1.454346776008606, "learning_rate": 0.00015452144653016397, "loss": 1.5947, "step": 9409 }, { "epoch": 0.3369921392375598, "grad_norm": 1.5266408920288086, "learning_rate": 0.0001545117227503397, "loss": 1.456, "step": 9410 }, { "epoch": 0.33702795136712804, "grad_norm": 1.5770138502120972, "learning_rate": 0.00015450199823713047, "loss": 1.4124, "step": 9411 }, { "epoch": 0.3370637634966963, "grad_norm": 1.7103294134140015, "learning_rate": 0.0001544922729906672, "loss": 1.4488, "step": 9412 }, { "epoch": 0.33709957562626464, "grad_norm": 1.5208357572555542, "learning_rate": 0.00015448254701108067, "loss": 1.437, "step": 9413 }, { "epoch": 0.3371353877558329, "grad_norm": 1.9375684261322021, "learning_rate": 0.00015447282029850174, "loss": 1.2997, "step": 9414 }, { "epoch": 0.3371711998854012, "grad_norm": 1.5922077894210815, "learning_rate": 0.00015446309285306131, "loss": 1.5067, "step": 9415 }, { "epoch": 0.33720701201496944, "grad_norm": 1.5397335290908813, "learning_rate": 0.0001544533646748902, "loss": 1.3838, "step": 9416 }, { "epoch": 0.33724282414453777, "grad_norm": 1.709815263748169, "learning_rate": 0.00015444363576411929, "loss": 1.3891, "step": 9417 }, { "epoch": 0.33727863627410604, "grad_norm": 1.6586835384368896, "learning_rate": 0.00015443390612087952, "loss": 1.3357, "step": 9418 }, { "epoch": 0.3373144484036743, "grad_norm": 1.817344069480896, "learning_rate": 0.00015442417574530173, "loss": 1.5462, "step": 9419 }, { "epoch": 0.33735026053324263, "grad_norm": 1.4759312868118286, "learning_rate": 0.00015441444463751687, "loss": 1.6142, "step": 9420 }, { "epoch": 0.3373860726628109, "grad_norm": 1.9167733192443848, "learning_rate": 0.00015440471279765583, "loss": 1.5566, "step": 9421 }, { "epoch": 0.33742188479237917, "grad_norm": 1.3484073877334595, "learning_rate": 0.00015439498022584957, "loss": 1.4768, "step": 9422 }, { "epoch": 0.33745769692194744, "grad_norm": 1.864970088005066, "learning_rate": 0.00015438524692222902, "loss": 1.5269, "step": 9423 }, { "epoch": 0.33749350905151576, "grad_norm": 1.4716098308563232, "learning_rate": 0.00015437551288692512, "loss": 1.452, "step": 9424 }, { "epoch": 0.33752932118108403, "grad_norm": 2.067917823791504, "learning_rate": 0.00015436577812006884, "loss": 1.4989, "step": 9425 }, { "epoch": 0.3375651333106523, "grad_norm": 2.316983938217163, "learning_rate": 0.00015435604262179116, "loss": 1.2659, "step": 9426 }, { "epoch": 0.3376009454402206, "grad_norm": 2.3151962757110596, "learning_rate": 0.000154346306392223, "loss": 1.5562, "step": 9427 }, { "epoch": 0.3376367575697889, "grad_norm": 1.9097533226013184, "learning_rate": 0.00015433656943149543, "loss": 1.7194, "step": 9428 }, { "epoch": 0.33767256969935716, "grad_norm": 1.6860368251800537, "learning_rate": 0.00015432683173973935, "loss": 1.7439, "step": 9429 }, { "epoch": 0.33770838182892543, "grad_norm": 1.6204484701156616, "learning_rate": 0.0001543170933170859, "loss": 1.4332, "step": 9430 }, { "epoch": 0.33774419395849375, "grad_norm": 1.807880163192749, "learning_rate": 0.00015430735416366596, "loss": 1.7937, "step": 9431 }, { "epoch": 0.337780006088062, "grad_norm": 1.4650651216506958, "learning_rate": 0.00015429761427961065, "loss": 1.456, "step": 9432 }, { "epoch": 0.3378158182176303, "grad_norm": 1.7693088054656982, "learning_rate": 0.00015428787366505094, "loss": 1.6138, "step": 9433 }, { "epoch": 0.3378516303471986, "grad_norm": 2.2674636840820312, "learning_rate": 0.00015427813232011799, "loss": 1.6677, "step": 9434 }, { "epoch": 0.3378874424767669, "grad_norm": 1.9544172286987305, "learning_rate": 0.00015426839024494272, "loss": 1.6052, "step": 9435 }, { "epoch": 0.33792325460633515, "grad_norm": 2.344970703125, "learning_rate": 0.0001542586474396563, "loss": 1.4323, "step": 9436 }, { "epoch": 0.3379590667359034, "grad_norm": 2.032620906829834, "learning_rate": 0.00015424890390438974, "loss": 1.5969, "step": 9437 }, { "epoch": 0.33799487886547175, "grad_norm": 1.735715627670288, "learning_rate": 0.00015423915963927418, "loss": 1.4165, "step": 9438 }, { "epoch": 0.33803069099504, "grad_norm": 1.2005674839019775, "learning_rate": 0.00015422941464444064, "loss": 1.569, "step": 9439 }, { "epoch": 0.3380665031246083, "grad_norm": 1.4805797338485718, "learning_rate": 0.00015421966892002032, "loss": 1.5567, "step": 9440 }, { "epoch": 0.3381023152541766, "grad_norm": 1.743964672088623, "learning_rate": 0.00015420992246614428, "loss": 1.5592, "step": 9441 }, { "epoch": 0.3381381273837449, "grad_norm": 1.772685170173645, "learning_rate": 0.00015420017528294368, "loss": 1.4956, "step": 9442 }, { "epoch": 0.33817393951331315, "grad_norm": 1.8625121116638184, "learning_rate": 0.00015419042737054963, "loss": 1.8012, "step": 9443 }, { "epoch": 0.3382097516428814, "grad_norm": 1.4339003562927246, "learning_rate": 0.00015418067872909326, "loss": 1.5661, "step": 9444 }, { "epoch": 0.33824556377244974, "grad_norm": 1.6561118364334106, "learning_rate": 0.00015417092935870574, "loss": 1.7139, "step": 9445 }, { "epoch": 0.338281375902018, "grad_norm": 1.8280638456344604, "learning_rate": 0.00015416117925951827, "loss": 1.143, "step": 9446 }, { "epoch": 0.3383171880315863, "grad_norm": 1.4793591499328613, "learning_rate": 0.000154151428431662, "loss": 1.422, "step": 9447 }, { "epoch": 0.3383530001611546, "grad_norm": 1.537424921989441, "learning_rate": 0.00015414167687526805, "loss": 1.3919, "step": 9448 }, { "epoch": 0.3383888122907229, "grad_norm": 2.0304996967315674, "learning_rate": 0.00015413192459046772, "loss": 1.3456, "step": 9449 }, { "epoch": 0.33842462442029114, "grad_norm": 1.8802131414413452, "learning_rate": 0.00015412217157739216, "loss": 1.2977, "step": 9450 }, { "epoch": 0.3384604365498594, "grad_norm": 1.9226833581924438, "learning_rate": 0.00015411241783617262, "loss": 1.4789, "step": 9451 }, { "epoch": 0.33849624867942774, "grad_norm": 1.7963732481002808, "learning_rate": 0.0001541026633669403, "loss": 1.3647, "step": 9452 }, { "epoch": 0.338532060808996, "grad_norm": 2.629523515701294, "learning_rate": 0.0001540929081698264, "loss": 1.3783, "step": 9453 }, { "epoch": 0.3385678729385643, "grad_norm": 1.9142543077468872, "learning_rate": 0.00015408315224496222, "loss": 1.5034, "step": 9454 }, { "epoch": 0.3386036850681326, "grad_norm": 1.6466937065124512, "learning_rate": 0.00015407339559247895, "loss": 1.5608, "step": 9455 }, { "epoch": 0.33863949719770087, "grad_norm": 1.5068143606185913, "learning_rate": 0.00015406363821250793, "loss": 1.5739, "step": 9456 }, { "epoch": 0.33867530932726914, "grad_norm": 1.6713124513626099, "learning_rate": 0.00015405388010518038, "loss": 1.9615, "step": 9457 }, { "epoch": 0.3387111214568374, "grad_norm": 2.409848928451538, "learning_rate": 0.00015404412127062762, "loss": 1.7276, "step": 9458 }, { "epoch": 0.33874693358640573, "grad_norm": 1.9540921449661255, "learning_rate": 0.00015403436170898088, "loss": 1.6689, "step": 9459 }, { "epoch": 0.338782745715974, "grad_norm": 1.4777239561080933, "learning_rate": 0.00015402460142037154, "loss": 1.4505, "step": 9460 }, { "epoch": 0.33881855784554227, "grad_norm": 2.009676694869995, "learning_rate": 0.00015401484040493085, "loss": 1.6446, "step": 9461 }, { "epoch": 0.3388543699751106, "grad_norm": 1.903566598892212, "learning_rate": 0.00015400507866279018, "loss": 1.5648, "step": 9462 }, { "epoch": 0.33889018210467886, "grad_norm": 1.4451779127120972, "learning_rate": 0.0001539953161940808, "loss": 1.6952, "step": 9463 }, { "epoch": 0.33892599423424713, "grad_norm": 2.141256332397461, "learning_rate": 0.00015398555299893412, "loss": 1.4004, "step": 9464 }, { "epoch": 0.3389618063638154, "grad_norm": 1.5447112321853638, "learning_rate": 0.00015397578907748146, "loss": 1.6946, "step": 9465 }, { "epoch": 0.3389976184933837, "grad_norm": 1.8307271003723145, "learning_rate": 0.00015396602442985417, "loss": 1.4, "step": 9466 }, { "epoch": 0.339033430622952, "grad_norm": 1.5202934741973877, "learning_rate": 0.00015395625905618364, "loss": 1.52, "step": 9467 }, { "epoch": 0.33906924275252026, "grad_norm": 1.4197298288345337, "learning_rate": 0.00015394649295660123, "loss": 1.6863, "step": 9468 }, { "epoch": 0.3391050548820886, "grad_norm": 1.4701685905456543, "learning_rate": 0.00015393672613123836, "loss": 1.3284, "step": 9469 }, { "epoch": 0.33914086701165685, "grad_norm": 1.5090365409851074, "learning_rate": 0.0001539269585802264, "loss": 1.5528, "step": 9470 }, { "epoch": 0.3391766791412251, "grad_norm": 1.5357013940811157, "learning_rate": 0.0001539171903036968, "loss": 1.5706, "step": 9471 }, { "epoch": 0.3392124912707934, "grad_norm": 1.3371944427490234, "learning_rate": 0.0001539074213017809, "loss": 1.2997, "step": 9472 }, { "epoch": 0.3392483034003617, "grad_norm": 1.5634480714797974, "learning_rate": 0.00015389765157461022, "loss": 1.2902, "step": 9473 }, { "epoch": 0.33928411552993, "grad_norm": 1.4515788555145264, "learning_rate": 0.00015388788112231615, "loss": 1.2752, "step": 9474 }, { "epoch": 0.33931992765949825, "grad_norm": 1.9081002473831177, "learning_rate": 0.00015387810994503016, "loss": 1.4576, "step": 9475 }, { "epoch": 0.3393557397890666, "grad_norm": 1.6875176429748535, "learning_rate": 0.0001538683380428837, "loss": 1.1983, "step": 9476 }, { "epoch": 0.33939155191863485, "grad_norm": 2.4767041206359863, "learning_rate": 0.00015385856541600825, "loss": 1.47, "step": 9477 }, { "epoch": 0.3394273640482031, "grad_norm": 1.4980638027191162, "learning_rate": 0.00015384879206453524, "loss": 1.4585, "step": 9478 }, { "epoch": 0.3394631761777714, "grad_norm": 2.4069361686706543, "learning_rate": 0.00015383901798859622, "loss": 1.485, "step": 9479 }, { "epoch": 0.3394989883073397, "grad_norm": 1.3716156482696533, "learning_rate": 0.00015382924318832264, "loss": 1.4037, "step": 9480 }, { "epoch": 0.339534800436908, "grad_norm": 1.2617043256759644, "learning_rate": 0.00015381946766384602, "loss": 1.4763, "step": 9481 }, { "epoch": 0.33957061256647625, "grad_norm": 1.9273022413253784, "learning_rate": 0.0001538096914152979, "loss": 1.676, "step": 9482 }, { "epoch": 0.3396064246960446, "grad_norm": 1.8018248081207275, "learning_rate": 0.00015379991444280979, "loss": 1.6164, "step": 9483 }, { "epoch": 0.33964223682561284, "grad_norm": 1.717280387878418, "learning_rate": 0.00015379013674651323, "loss": 1.6003, "step": 9484 }, { "epoch": 0.3396780489551811, "grad_norm": 1.4812376499176025, "learning_rate": 0.00015378035832653975, "loss": 1.7848, "step": 9485 }, { "epoch": 0.3397138610847494, "grad_norm": 1.6170680522918701, "learning_rate": 0.00015377057918302097, "loss": 1.567, "step": 9486 }, { "epoch": 0.3397496732143177, "grad_norm": 1.3598394393920898, "learning_rate": 0.00015376079931608838, "loss": 1.5678, "step": 9487 }, { "epoch": 0.339785485343886, "grad_norm": 1.3419015407562256, "learning_rate": 0.00015375101872587357, "loss": 1.4495, "step": 9488 }, { "epoch": 0.33982129747345424, "grad_norm": 1.6549489498138428, "learning_rate": 0.00015374123741250815, "loss": 1.4173, "step": 9489 }, { "epoch": 0.33985710960302257, "grad_norm": 1.4437153339385986, "learning_rate": 0.00015373145537612369, "loss": 1.5415, "step": 9490 }, { "epoch": 0.33989292173259084, "grad_norm": 3.1351869106292725, "learning_rate": 0.00015372167261685178, "loss": 1.3433, "step": 9491 }, { "epoch": 0.3399287338621591, "grad_norm": 2.001826763153076, "learning_rate": 0.0001537118891348241, "loss": 1.3978, "step": 9492 }, { "epoch": 0.3399645459917274, "grad_norm": 1.6999043226242065, "learning_rate": 0.00015370210493017222, "loss": 1.4984, "step": 9493 }, { "epoch": 0.3400003581212957, "grad_norm": 2.0367825031280518, "learning_rate": 0.00015369232000302777, "loss": 1.7663, "step": 9494 }, { "epoch": 0.34003617025086397, "grad_norm": 2.383127212524414, "learning_rate": 0.00015368253435352246, "loss": 1.5093, "step": 9495 }, { "epoch": 0.34007198238043224, "grad_norm": 1.8723777532577515, "learning_rate": 0.00015367274798178788, "loss": 1.5293, "step": 9496 }, { "epoch": 0.34010779451000056, "grad_norm": 1.9914052486419678, "learning_rate": 0.0001536629608879557, "loss": 1.4791, "step": 9497 }, { "epoch": 0.34014360663956883, "grad_norm": 1.547864317893982, "learning_rate": 0.00015365317307215759, "loss": 1.7745, "step": 9498 }, { "epoch": 0.3401794187691371, "grad_norm": 2.0833046436309814, "learning_rate": 0.00015364338453452528, "loss": 1.5529, "step": 9499 }, { "epoch": 0.34021523089870537, "grad_norm": 1.4355981349945068, "learning_rate": 0.00015363359527519036, "loss": 1.5735, "step": 9500 }, { "epoch": 0.3402510430282737, "grad_norm": 2.000267267227173, "learning_rate": 0.00015362380529428466, "loss": 1.4826, "step": 9501 }, { "epoch": 0.34028685515784196, "grad_norm": 2.343449831008911, "learning_rate": 0.0001536140145919398, "loss": 1.479, "step": 9502 }, { "epoch": 0.34032266728741023, "grad_norm": 1.5564789772033691, "learning_rate": 0.00015360422316828754, "loss": 1.6355, "step": 9503 }, { "epoch": 0.34035847941697855, "grad_norm": 1.6426461935043335, "learning_rate": 0.0001535944310234596, "loss": 1.7457, "step": 9504 }, { "epoch": 0.3403942915465468, "grad_norm": 1.9233627319335938, "learning_rate": 0.0001535846381575877, "loss": 1.7404, "step": 9505 }, { "epoch": 0.3404301036761151, "grad_norm": 1.6464073657989502, "learning_rate": 0.00015357484457080366, "loss": 1.1418, "step": 9506 }, { "epoch": 0.34046591580568336, "grad_norm": 1.4640589952468872, "learning_rate": 0.00015356505026323917, "loss": 1.5402, "step": 9507 }, { "epoch": 0.3405017279352517, "grad_norm": 1.8246670961380005, "learning_rate": 0.00015355525523502603, "loss": 1.6663, "step": 9508 }, { "epoch": 0.34053754006481995, "grad_norm": 1.9067139625549316, "learning_rate": 0.00015354545948629598, "loss": 1.6515, "step": 9509 }, { "epoch": 0.3405733521943882, "grad_norm": 1.3062249422073364, "learning_rate": 0.00015353566301718087, "loss": 1.3262, "step": 9510 }, { "epoch": 0.34060916432395655, "grad_norm": 1.529366374015808, "learning_rate": 0.00015352586582781247, "loss": 1.5815, "step": 9511 }, { "epoch": 0.3406449764535248, "grad_norm": 1.1446207761764526, "learning_rate": 0.0001535160679183226, "loss": 1.3855, "step": 9512 }, { "epoch": 0.3406807885830931, "grad_norm": 1.392532467842102, "learning_rate": 0.00015350626928884307, "loss": 1.6394, "step": 9513 }, { "epoch": 0.34071660071266135, "grad_norm": 1.3918993473052979, "learning_rate": 0.00015349646993950567, "loss": 1.3734, "step": 9514 }, { "epoch": 0.3407524128422297, "grad_norm": 1.6025148630142212, "learning_rate": 0.00015348666987044228, "loss": 1.603, "step": 9515 }, { "epoch": 0.34078822497179795, "grad_norm": 1.2502943277359009, "learning_rate": 0.00015347686908178475, "loss": 1.2778, "step": 9516 }, { "epoch": 0.3408240371013662, "grad_norm": 1.4781215190887451, "learning_rate": 0.0001534670675736649, "loss": 1.2568, "step": 9517 }, { "epoch": 0.34085984923093454, "grad_norm": 1.4211047887802124, "learning_rate": 0.00015345726534621466, "loss": 1.7071, "step": 9518 }, { "epoch": 0.3408956613605028, "grad_norm": 1.5720802545547485, "learning_rate": 0.00015344746239956587, "loss": 1.6633, "step": 9519 }, { "epoch": 0.3409314734900711, "grad_norm": 1.248879075050354, "learning_rate": 0.00015343765873385037, "loss": 1.731, "step": 9520 }, { "epoch": 0.34096728561963935, "grad_norm": 1.5327391624450684, "learning_rate": 0.00015342785434920017, "loss": 1.3957, "step": 9521 }, { "epoch": 0.3410030977492077, "grad_norm": 1.6686993837356567, "learning_rate": 0.00015341804924574707, "loss": 1.5403, "step": 9522 }, { "epoch": 0.34103890987877594, "grad_norm": 1.3809535503387451, "learning_rate": 0.00015340824342362303, "loss": 1.6858, "step": 9523 }, { "epoch": 0.3410747220083442, "grad_norm": 1.8143247365951538, "learning_rate": 0.00015339843688295997, "loss": 1.5465, "step": 9524 }, { "epoch": 0.34111053413791254, "grad_norm": 1.9842431545257568, "learning_rate": 0.00015338862962388977, "loss": 1.294, "step": 9525 }, { "epoch": 0.3411463462674808, "grad_norm": 1.9405596256256104, "learning_rate": 0.0001533788216465445, "loss": 1.6388, "step": 9526 }, { "epoch": 0.3411821583970491, "grad_norm": 1.7968621253967285, "learning_rate": 0.00015336901295105596, "loss": 1.8472, "step": 9527 }, { "epoch": 0.34121797052661734, "grad_norm": 2.323481559753418, "learning_rate": 0.00015335920353755627, "loss": 1.5243, "step": 9528 }, { "epoch": 0.34125378265618567, "grad_norm": 3.580449342727661, "learning_rate": 0.00015334939340617726, "loss": 1.6631, "step": 9529 }, { "epoch": 0.34128959478575394, "grad_norm": 1.3738518953323364, "learning_rate": 0.00015333958255705102, "loss": 1.6034, "step": 9530 }, { "epoch": 0.3413254069153222, "grad_norm": 1.4165027141571045, "learning_rate": 0.00015332977099030953, "loss": 1.8126, "step": 9531 }, { "epoch": 0.34136121904489053, "grad_norm": 1.6622012853622437, "learning_rate": 0.0001533199587060847, "loss": 1.3838, "step": 9532 }, { "epoch": 0.3413970311744588, "grad_norm": 1.4645581245422363, "learning_rate": 0.0001533101457045086, "loss": 1.648, "step": 9533 }, { "epoch": 0.34143284330402707, "grad_norm": 1.7480566501617432, "learning_rate": 0.0001533003319857133, "loss": 1.3988, "step": 9534 }, { "epoch": 0.34146865543359534, "grad_norm": 1.2829252481460571, "learning_rate": 0.00015329051754983076, "loss": 1.313, "step": 9535 }, { "epoch": 0.34150446756316366, "grad_norm": 1.990755558013916, "learning_rate": 0.00015328070239699305, "loss": 1.4727, "step": 9536 }, { "epoch": 0.34154027969273193, "grad_norm": 1.4537103176116943, "learning_rate": 0.0001532708865273322, "loss": 1.4554, "step": 9537 }, { "epoch": 0.3415760918223002, "grad_norm": 1.6201869249343872, "learning_rate": 0.0001532610699409803, "loss": 1.5224, "step": 9538 }, { "epoch": 0.3416119039518685, "grad_norm": 1.5027778148651123, "learning_rate": 0.00015325125263806943, "loss": 1.4322, "step": 9539 }, { "epoch": 0.3416477160814368, "grad_norm": 1.5254660844802856, "learning_rate": 0.0001532414346187316, "loss": 1.5606, "step": 9540 }, { "epoch": 0.34168352821100506, "grad_norm": 1.727812647819519, "learning_rate": 0.000153231615883099, "loss": 1.6136, "step": 9541 }, { "epoch": 0.34171934034057333, "grad_norm": 2.322026491165161, "learning_rate": 0.0001532217964313036, "loss": 1.5319, "step": 9542 }, { "epoch": 0.34175515247014165, "grad_norm": 2.0224554538726807, "learning_rate": 0.00015321197626347766, "loss": 1.6205, "step": 9543 }, { "epoch": 0.3417909645997099, "grad_norm": 1.3443764448165894, "learning_rate": 0.00015320215537975313, "loss": 1.6137, "step": 9544 }, { "epoch": 0.3418267767292782, "grad_norm": 1.777479648590088, "learning_rate": 0.0001531923337802623, "loss": 1.7252, "step": 9545 }, { "epoch": 0.3418625888588465, "grad_norm": 1.4354276657104492, "learning_rate": 0.0001531825114651372, "loss": 1.5497, "step": 9546 }, { "epoch": 0.3418984009884148, "grad_norm": 2.5619523525238037, "learning_rate": 0.00015317268843451003, "loss": 1.5586, "step": 9547 }, { "epoch": 0.34193421311798305, "grad_norm": 1.8442054986953735, "learning_rate": 0.0001531628646885129, "loss": 1.6815, "step": 9548 }, { "epoch": 0.3419700252475513, "grad_norm": 1.897230625152588, "learning_rate": 0.000153153040227278, "loss": 1.4317, "step": 9549 }, { "epoch": 0.34200583737711965, "grad_norm": 1.4326685667037964, "learning_rate": 0.00015314321505093751, "loss": 1.6475, "step": 9550 }, { "epoch": 0.3420416495066879, "grad_norm": 2.543628692626953, "learning_rate": 0.00015313338915962362, "loss": 1.6623, "step": 9551 }, { "epoch": 0.3420774616362562, "grad_norm": 1.4500302076339722, "learning_rate": 0.0001531235625534685, "loss": 1.3383, "step": 9552 }, { "epoch": 0.3421132737658245, "grad_norm": 1.4041082859039307, "learning_rate": 0.00015311373523260437, "loss": 1.3654, "step": 9553 }, { "epoch": 0.3421490858953928, "grad_norm": 1.5999187231063843, "learning_rate": 0.00015310390719716348, "loss": 1.5783, "step": 9554 }, { "epoch": 0.34218489802496105, "grad_norm": 1.8230500221252441, "learning_rate": 0.000153094078447278, "loss": 1.4435, "step": 9555 }, { "epoch": 0.3422207101545293, "grad_norm": 2.0518381595611572, "learning_rate": 0.00015308424898308017, "loss": 1.5841, "step": 9556 }, { "epoch": 0.34225652228409764, "grad_norm": 1.2910943031311035, "learning_rate": 0.00015307441880470227, "loss": 1.5275, "step": 9557 }, { "epoch": 0.3422923344136659, "grad_norm": 1.3538620471954346, "learning_rate": 0.00015306458791227646, "loss": 1.685, "step": 9558 }, { "epoch": 0.3423281465432342, "grad_norm": 2.853557586669922, "learning_rate": 0.00015305475630593516, "loss": 1.5067, "step": 9559 }, { "epoch": 0.3423639586728025, "grad_norm": 2.141958713531494, "learning_rate": 0.00015304492398581046, "loss": 1.4039, "step": 9560 }, { "epoch": 0.3423997708023708, "grad_norm": 2.1874539852142334, "learning_rate": 0.0001530350909520348, "loss": 1.6577, "step": 9561 }, { "epoch": 0.34243558293193904, "grad_norm": 1.843515396118164, "learning_rate": 0.00015302525720474038, "loss": 1.6085, "step": 9562 }, { "epoch": 0.3424713950615073, "grad_norm": 2.459810733795166, "learning_rate": 0.00015301542274405948, "loss": 1.5343, "step": 9563 }, { "epoch": 0.34250720719107564, "grad_norm": 2.1196999549865723, "learning_rate": 0.00015300558757012448, "loss": 1.2307, "step": 9564 }, { "epoch": 0.3425430193206439, "grad_norm": 5.689758777618408, "learning_rate": 0.00015299575168306774, "loss": 1.5805, "step": 9565 }, { "epoch": 0.3425788314502122, "grad_norm": 1.7519400119781494, "learning_rate": 0.00015298591508302142, "loss": 1.3325, "step": 9566 }, { "epoch": 0.3426146435797805, "grad_norm": 1.4828169345855713, "learning_rate": 0.000152976077770118, "loss": 1.5663, "step": 9567 }, { "epoch": 0.34265045570934877, "grad_norm": 1.916289210319519, "learning_rate": 0.00015296623974448982, "loss": 1.7129, "step": 9568 }, { "epoch": 0.34268626783891704, "grad_norm": 1.342599630355835, "learning_rate": 0.00015295640100626914, "loss": 1.3287, "step": 9569 }, { "epoch": 0.3427220799684853, "grad_norm": 1.6580992937088013, "learning_rate": 0.00015294656155558843, "loss": 1.2133, "step": 9570 }, { "epoch": 0.34275789209805363, "grad_norm": 1.2530838251113892, "learning_rate": 0.00015293672139258003, "loss": 1.3984, "step": 9571 }, { "epoch": 0.3427937042276219, "grad_norm": 1.6452621221542358, "learning_rate": 0.00015292688051737633, "loss": 1.5749, "step": 9572 }, { "epoch": 0.34282951635719017, "grad_norm": 1.9408454895019531, "learning_rate": 0.0001529170389301097, "loss": 1.5734, "step": 9573 }, { "epoch": 0.3428653284867585, "grad_norm": 1.6352133750915527, "learning_rate": 0.00015290719663091262, "loss": 1.5879, "step": 9574 }, { "epoch": 0.34290114061632676, "grad_norm": 2.4092254638671875, "learning_rate": 0.00015289735361991743, "loss": 1.4012, "step": 9575 }, { "epoch": 0.34293695274589503, "grad_norm": 1.5429397821426392, "learning_rate": 0.00015288750989725657, "loss": 1.2422, "step": 9576 }, { "epoch": 0.3429727648754633, "grad_norm": 1.6832060813903809, "learning_rate": 0.00015287766546306247, "loss": 1.6365, "step": 9577 }, { "epoch": 0.3430085770050316, "grad_norm": 1.3857985734939575, "learning_rate": 0.00015286782031746763, "loss": 1.4832, "step": 9578 }, { "epoch": 0.3430443891345999, "grad_norm": 2.488194227218628, "learning_rate": 0.00015285797446060442, "loss": 1.8965, "step": 9579 }, { "epoch": 0.34308020126416816, "grad_norm": 1.5207929611206055, "learning_rate": 0.00015284812789260536, "loss": 1.5894, "step": 9580 }, { "epoch": 0.3431160133937365, "grad_norm": 1.9598724842071533, "learning_rate": 0.00015283828061360291, "loss": 1.423, "step": 9581 }, { "epoch": 0.34315182552330475, "grad_norm": 2.2789857387542725, "learning_rate": 0.00015282843262372955, "loss": 1.7208, "step": 9582 }, { "epoch": 0.343187637652873, "grad_norm": 1.4618828296661377, "learning_rate": 0.0001528185839231178, "loss": 1.7469, "step": 9583 }, { "epoch": 0.3432234497824413, "grad_norm": 1.8113200664520264, "learning_rate": 0.00015280873451190008, "loss": 1.5084, "step": 9584 }, { "epoch": 0.3432592619120096, "grad_norm": 2.0830557346343994, "learning_rate": 0.000152798884390209, "loss": 1.5144, "step": 9585 }, { "epoch": 0.3432950740415779, "grad_norm": 2.5192768573760986, "learning_rate": 0.000152789033558177, "loss": 1.6695, "step": 9586 }, { "epoch": 0.34333088617114615, "grad_norm": 2.1534531116485596, "learning_rate": 0.0001527791820159367, "loss": 1.5468, "step": 9587 }, { "epoch": 0.3433666983007145, "grad_norm": 1.605986475944519, "learning_rate": 0.00015276932976362052, "loss": 1.2892, "step": 9588 }, { "epoch": 0.34340251043028275, "grad_norm": 1.4528756141662598, "learning_rate": 0.00015275947680136112, "loss": 1.5642, "step": 9589 }, { "epoch": 0.343438322559851, "grad_norm": 1.5881379842758179, "learning_rate": 0.000152749623129291, "loss": 1.5147, "step": 9590 }, { "epoch": 0.3434741346894193, "grad_norm": 1.604173183441162, "learning_rate": 0.00015273976874754274, "loss": 1.5219, "step": 9591 }, { "epoch": 0.3435099468189876, "grad_norm": 1.7604161500930786, "learning_rate": 0.00015272991365624896, "loss": 1.727, "step": 9592 }, { "epoch": 0.3435457589485559, "grad_norm": 1.5851354598999023, "learning_rate": 0.00015272005785554215, "loss": 1.7405, "step": 9593 }, { "epoch": 0.34358157107812415, "grad_norm": 1.7585153579711914, "learning_rate": 0.000152710201345555, "loss": 1.5413, "step": 9594 }, { "epoch": 0.3436173832076925, "grad_norm": 2.0630173683166504, "learning_rate": 0.00015270034412642007, "loss": 1.7264, "step": 9595 }, { "epoch": 0.34365319533726074, "grad_norm": 2.172152042388916, "learning_rate": 0.00015269048619827, "loss": 1.7205, "step": 9596 }, { "epoch": 0.343689007466829, "grad_norm": 2.5975513458251953, "learning_rate": 0.0001526806275612374, "loss": 1.9148, "step": 9597 }, { "epoch": 0.3437248195963973, "grad_norm": 1.559423804283142, "learning_rate": 0.00015267076821545489, "loss": 1.6451, "step": 9598 }, { "epoch": 0.3437606317259656, "grad_norm": 2.140960693359375, "learning_rate": 0.00015266090816105514, "loss": 1.9031, "step": 9599 }, { "epoch": 0.3437964438555339, "grad_norm": 1.62228262424469, "learning_rate": 0.00015265104739817082, "loss": 1.8635, "step": 9600 }, { "epoch": 0.34383225598510214, "grad_norm": 1.7803810834884644, "learning_rate": 0.00015264118592693457, "loss": 1.2803, "step": 9601 }, { "epoch": 0.34386806811467047, "grad_norm": 1.7218365669250488, "learning_rate": 0.00015263132374747907, "loss": 1.3518, "step": 9602 }, { "epoch": 0.34390388024423874, "grad_norm": 1.9356237649917603, "learning_rate": 0.00015262146085993697, "loss": 1.7451, "step": 9603 }, { "epoch": 0.343939692373807, "grad_norm": 1.4498659372329712, "learning_rate": 0.00015261159726444098, "loss": 1.7193, "step": 9604 }, { "epoch": 0.3439755045033753, "grad_norm": 1.850898027420044, "learning_rate": 0.00015260173296112385, "loss": 1.4767, "step": 9605 }, { "epoch": 0.3440113166329436, "grad_norm": 1.6529873609542847, "learning_rate": 0.00015259186795011823, "loss": 1.9236, "step": 9606 }, { "epoch": 0.34404712876251187, "grad_norm": 2.0878450870513916, "learning_rate": 0.0001525820022315569, "loss": 1.684, "step": 9607 }, { "epoch": 0.34408294089208014, "grad_norm": 1.974679708480835, "learning_rate": 0.0001525721358055725, "loss": 1.4546, "step": 9608 }, { "epoch": 0.3441187530216484, "grad_norm": 1.576131820678711, "learning_rate": 0.0001525622686722979, "loss": 1.5243, "step": 9609 }, { "epoch": 0.34415456515121673, "grad_norm": 2.246279716491699, "learning_rate": 0.00015255240083186572, "loss": 1.2719, "step": 9610 }, { "epoch": 0.344190377280785, "grad_norm": 1.4744218587875366, "learning_rate": 0.00015254253228440877, "loss": 1.569, "step": 9611 }, { "epoch": 0.34422618941035327, "grad_norm": 2.0905468463897705, "learning_rate": 0.00015253266303005987, "loss": 1.3189, "step": 9612 }, { "epoch": 0.3442620015399216, "grad_norm": 1.9109582901000977, "learning_rate": 0.00015252279306895172, "loss": 1.3807, "step": 9613 }, { "epoch": 0.34429781366948986, "grad_norm": 1.441930890083313, "learning_rate": 0.00015251292240121714, "loss": 1.3186, "step": 9614 }, { "epoch": 0.34433362579905813, "grad_norm": 1.6397720575332642, "learning_rate": 0.0001525030510269889, "loss": 1.477, "step": 9615 }, { "epoch": 0.3443694379286264, "grad_norm": 1.3502668142318726, "learning_rate": 0.00015249317894639987, "loss": 1.5195, "step": 9616 }, { "epoch": 0.3444052500581947, "grad_norm": 1.8765751123428345, "learning_rate": 0.00015248330615958282, "loss": 1.4637, "step": 9617 }, { "epoch": 0.344441062187763, "grad_norm": 1.7291653156280518, "learning_rate": 0.00015247343266667061, "loss": 1.5855, "step": 9618 }, { "epoch": 0.34447687431733126, "grad_norm": 1.695115566253662, "learning_rate": 0.00015246355846779602, "loss": 1.8087, "step": 9619 }, { "epoch": 0.3445126864468996, "grad_norm": 1.4280116558074951, "learning_rate": 0.00015245368356309194, "loss": 1.3901, "step": 9620 }, { "epoch": 0.34454849857646785, "grad_norm": 1.5115997791290283, "learning_rate": 0.00015244380795269118, "loss": 1.7692, "step": 9621 }, { "epoch": 0.3445843107060361, "grad_norm": 1.878031849861145, "learning_rate": 0.00015243393163672664, "loss": 1.469, "step": 9622 }, { "epoch": 0.3446201228356044, "grad_norm": 1.791983723640442, "learning_rate": 0.00015242405461533118, "loss": 1.6334, "step": 9623 }, { "epoch": 0.3446559349651727, "grad_norm": 1.6626172065734863, "learning_rate": 0.0001524141768886377, "loss": 1.527, "step": 9624 }, { "epoch": 0.344691747094741, "grad_norm": 2.501840829849243, "learning_rate": 0.0001524042984567791, "loss": 1.6678, "step": 9625 }, { "epoch": 0.34472755922430925, "grad_norm": 1.8956242799758911, "learning_rate": 0.0001523944193198882, "loss": 1.4583, "step": 9626 }, { "epoch": 0.3447633713538776, "grad_norm": 2.0565409660339355, "learning_rate": 0.00015238453947809805, "loss": 1.697, "step": 9627 }, { "epoch": 0.34479918348344585, "grad_norm": 2.0155301094055176, "learning_rate": 0.00015237465893154143, "loss": 1.71, "step": 9628 }, { "epoch": 0.3448349956130141, "grad_norm": 2.654571056365967, "learning_rate": 0.00015236477768035137, "loss": 1.5797, "step": 9629 }, { "epoch": 0.3448708077425824, "grad_norm": 1.6048696041107178, "learning_rate": 0.00015235489572466078, "loss": 1.7246, "step": 9630 }, { "epoch": 0.3449066198721507, "grad_norm": 2.183603525161743, "learning_rate": 0.00015234501306460256, "loss": 1.3855, "step": 9631 }, { "epoch": 0.344942432001719, "grad_norm": 1.7944676876068115, "learning_rate": 0.0001523351297003097, "loss": 1.585, "step": 9632 }, { "epoch": 0.34497824413128725, "grad_norm": 1.8598835468292236, "learning_rate": 0.00015232524563191523, "loss": 1.3623, "step": 9633 }, { "epoch": 0.3450140562608556, "grad_norm": 2.6025619506835938, "learning_rate": 0.00015231536085955205, "loss": 1.6624, "step": 9634 }, { "epoch": 0.34504986839042384, "grad_norm": 1.3773020505905151, "learning_rate": 0.00015230547538335317, "loss": 1.68, "step": 9635 }, { "epoch": 0.3450856805199921, "grad_norm": 2.4558515548706055, "learning_rate": 0.00015229558920345162, "loss": 1.5141, "step": 9636 }, { "epoch": 0.3451214926495604, "grad_norm": 1.6440409421920776, "learning_rate": 0.00015228570231998033, "loss": 1.5429, "step": 9637 }, { "epoch": 0.3451573047791287, "grad_norm": 1.8336268663406372, "learning_rate": 0.00015227581473307238, "loss": 1.559, "step": 9638 }, { "epoch": 0.345193116908697, "grad_norm": 1.4943885803222656, "learning_rate": 0.00015226592644286075, "loss": 1.3127, "step": 9639 }, { "epoch": 0.34522892903826524, "grad_norm": 1.8755306005477905, "learning_rate": 0.00015225603744947852, "loss": 1.5688, "step": 9640 }, { "epoch": 0.34526474116783357, "grad_norm": 1.2759945392608643, "learning_rate": 0.0001522461477530587, "loss": 1.5486, "step": 9641 }, { "epoch": 0.34530055329740184, "grad_norm": 1.2741237878799438, "learning_rate": 0.00015223625735373436, "loss": 1.5329, "step": 9642 }, { "epoch": 0.3453363654269701, "grad_norm": 1.827254056930542, "learning_rate": 0.00015222636625163854, "loss": 1.7189, "step": 9643 }, { "epoch": 0.3453721775565384, "grad_norm": 1.470656394958496, "learning_rate": 0.00015221647444690437, "loss": 1.3983, "step": 9644 }, { "epoch": 0.3454079896861067, "grad_norm": 1.6272372007369995, "learning_rate": 0.00015220658193966489, "loss": 1.4919, "step": 9645 }, { "epoch": 0.34544380181567497, "grad_norm": 1.6300824880599976, "learning_rate": 0.00015219668873005314, "loss": 1.567, "step": 9646 }, { "epoch": 0.34547961394524324, "grad_norm": 1.6557114124298096, "learning_rate": 0.0001521867948182023, "loss": 1.5153, "step": 9647 }, { "epoch": 0.34551542607481156, "grad_norm": 1.9078369140625, "learning_rate": 0.00015217690020424547, "loss": 1.8161, "step": 9648 }, { "epoch": 0.34555123820437983, "grad_norm": 1.5560898780822754, "learning_rate": 0.00015216700488831573, "loss": 1.6721, "step": 9649 }, { "epoch": 0.3455870503339481, "grad_norm": 1.4022656679153442, "learning_rate": 0.00015215710887054622, "loss": 1.5221, "step": 9650 }, { "epoch": 0.34562286246351637, "grad_norm": 1.6788058280944824, "learning_rate": 0.00015214721215107011, "loss": 1.459, "step": 9651 }, { "epoch": 0.3456586745930847, "grad_norm": 1.7111127376556396, "learning_rate": 0.0001521373147300205, "loss": 1.3335, "step": 9652 }, { "epoch": 0.34569448672265296, "grad_norm": 1.7129048109054565, "learning_rate": 0.0001521274166075306, "loss": 1.5987, "step": 9653 }, { "epoch": 0.34573029885222123, "grad_norm": 1.3389177322387695, "learning_rate": 0.00015211751778373357, "loss": 1.424, "step": 9654 }, { "epoch": 0.34576611098178955, "grad_norm": 1.848729133605957, "learning_rate": 0.0001521076182587625, "loss": 1.7741, "step": 9655 }, { "epoch": 0.3458019231113578, "grad_norm": 1.724698781967163, "learning_rate": 0.0001520977180327507, "loss": 1.4231, "step": 9656 }, { "epoch": 0.3458377352409261, "grad_norm": 2.336831569671631, "learning_rate": 0.00015208781710583126, "loss": 1.2322, "step": 9657 }, { "epoch": 0.34587354737049436, "grad_norm": 2.277905225753784, "learning_rate": 0.00015207791547813744, "loss": 1.6277, "step": 9658 }, { "epoch": 0.3459093595000627, "grad_norm": 1.8792333602905273, "learning_rate": 0.00015206801314980245, "loss": 1.7224, "step": 9659 }, { "epoch": 0.34594517162963095, "grad_norm": 1.5436029434204102, "learning_rate": 0.00015205811012095952, "loss": 1.7911, "step": 9660 }, { "epoch": 0.3459809837591992, "grad_norm": 1.6252689361572266, "learning_rate": 0.00015204820639174184, "loss": 1.5968, "step": 9661 }, { "epoch": 0.34601679588876755, "grad_norm": 1.3083243370056152, "learning_rate": 0.00015203830196228272, "loss": 1.6488, "step": 9662 }, { "epoch": 0.3460526080183358, "grad_norm": 2.9727373123168945, "learning_rate": 0.00015202839683271536, "loss": 1.2147, "step": 9663 }, { "epoch": 0.3460884201479041, "grad_norm": 1.409303069114685, "learning_rate": 0.000152018491003173, "loss": 1.3595, "step": 9664 }, { "epoch": 0.34612423227747235, "grad_norm": 1.4419803619384766, "learning_rate": 0.00015200858447378897, "loss": 1.5398, "step": 9665 }, { "epoch": 0.3461600444070407, "grad_norm": 1.60395348072052, "learning_rate": 0.0001519986772446965, "loss": 1.44, "step": 9666 }, { "epoch": 0.34619585653660895, "grad_norm": 1.5209178924560547, "learning_rate": 0.00015198876931602894, "loss": 1.6457, "step": 9667 }, { "epoch": 0.3462316686661772, "grad_norm": 1.4209908246994019, "learning_rate": 0.00015197886068791952, "loss": 1.5196, "step": 9668 }, { "epoch": 0.34626748079574554, "grad_norm": 1.624914288520813, "learning_rate": 0.00015196895136050157, "loss": 1.3967, "step": 9669 }, { "epoch": 0.3463032929253138, "grad_norm": 1.4199450016021729, "learning_rate": 0.00015195904133390842, "loss": 1.2958, "step": 9670 }, { "epoch": 0.3463391050548821, "grad_norm": 1.8208658695220947, "learning_rate": 0.00015194913060827343, "loss": 1.4508, "step": 9671 }, { "epoch": 0.34637491718445035, "grad_norm": 1.6885290145874023, "learning_rate": 0.00015193921918372984, "loss": 1.4547, "step": 9672 }, { "epoch": 0.3464107293140187, "grad_norm": 1.8708484172821045, "learning_rate": 0.00015192930706041112, "loss": 1.5467, "step": 9673 }, { "epoch": 0.34644654144358694, "grad_norm": 1.600899338722229, "learning_rate": 0.00015191939423845049, "loss": 1.4948, "step": 9674 }, { "epoch": 0.3464823535731552, "grad_norm": 1.680953025817871, "learning_rate": 0.0001519094807179814, "loss": 1.5515, "step": 9675 }, { "epoch": 0.34651816570272354, "grad_norm": 2.119535207748413, "learning_rate": 0.00015189956649913722, "loss": 1.5226, "step": 9676 }, { "epoch": 0.3465539778322918, "grad_norm": 1.7617014646530151, "learning_rate": 0.0001518896515820513, "loss": 1.8543, "step": 9677 }, { "epoch": 0.3465897899618601, "grad_norm": 1.7265313863754272, "learning_rate": 0.00015187973596685706, "loss": 1.5269, "step": 9678 }, { "epoch": 0.34662560209142834, "grad_norm": 1.4291623830795288, "learning_rate": 0.0001518698196536879, "loss": 1.4145, "step": 9679 }, { "epoch": 0.34666141422099667, "grad_norm": 1.4286699295043945, "learning_rate": 0.00015185990264267725, "loss": 1.4807, "step": 9680 }, { "epoch": 0.34669722635056494, "grad_norm": 1.8250465393066406, "learning_rate": 0.00015184998493395846, "loss": 1.841, "step": 9681 }, { "epoch": 0.3467330384801332, "grad_norm": 1.9351611137390137, "learning_rate": 0.00015184006652766503, "loss": 1.4977, "step": 9682 }, { "epoch": 0.34676885060970153, "grad_norm": 2.0955495834350586, "learning_rate": 0.00015183014742393036, "loss": 1.6411, "step": 9683 }, { "epoch": 0.3468046627392698, "grad_norm": 2.191845178604126, "learning_rate": 0.0001518202276228879, "loss": 1.3476, "step": 9684 }, { "epoch": 0.34684047486883807, "grad_norm": 1.5463204383850098, "learning_rate": 0.00015181030712467113, "loss": 1.4183, "step": 9685 }, { "epoch": 0.34687628699840634, "grad_norm": 1.43691086769104, "learning_rate": 0.0001518003859294135, "loss": 1.643, "step": 9686 }, { "epoch": 0.34691209912797466, "grad_norm": 1.4780082702636719, "learning_rate": 0.00015179046403724852, "loss": 1.5861, "step": 9687 }, { "epoch": 0.34694791125754293, "grad_norm": 1.961837887763977, "learning_rate": 0.00015178054144830965, "loss": 1.7576, "step": 9688 }, { "epoch": 0.3469837233871112, "grad_norm": 1.8840779066085815, "learning_rate": 0.0001517706181627304, "loss": 1.3534, "step": 9689 }, { "epoch": 0.3470195355166795, "grad_norm": 2.285611629486084, "learning_rate": 0.0001517606941806442, "loss": 1.8627, "step": 9690 }, { "epoch": 0.3470553476462478, "grad_norm": 1.699450135231018, "learning_rate": 0.00015175076950218468, "loss": 1.3957, "step": 9691 }, { "epoch": 0.34709115977581606, "grad_norm": 1.8687032461166382, "learning_rate": 0.00015174084412748529, "loss": 1.4801, "step": 9692 }, { "epoch": 0.34712697190538433, "grad_norm": 1.4657593965530396, "learning_rate": 0.00015173091805667957, "loss": 1.5096, "step": 9693 }, { "epoch": 0.34716278403495265, "grad_norm": 1.5671417713165283, "learning_rate": 0.0001517209912899011, "loss": 1.6317, "step": 9694 }, { "epoch": 0.3471985961645209, "grad_norm": 1.8080321550369263, "learning_rate": 0.00015171106382728342, "loss": 1.2073, "step": 9695 }, { "epoch": 0.3472344082940892, "grad_norm": 1.7904155254364014, "learning_rate": 0.00015170113566896005, "loss": 1.3794, "step": 9696 }, { "epoch": 0.3472702204236575, "grad_norm": 1.9424508810043335, "learning_rate": 0.0001516912068150646, "loss": 1.6862, "step": 9697 }, { "epoch": 0.3473060325532258, "grad_norm": 1.6134363412857056, "learning_rate": 0.00015168127726573064, "loss": 1.5705, "step": 9698 }, { "epoch": 0.34734184468279405, "grad_norm": 2.201932430267334, "learning_rate": 0.00015167134702109177, "loss": 1.5044, "step": 9699 }, { "epoch": 0.3473776568123623, "grad_norm": 1.6935111284255981, "learning_rate": 0.00015166141608128158, "loss": 1.7495, "step": 9700 }, { "epoch": 0.34741346894193065, "grad_norm": 1.9058157205581665, "learning_rate": 0.0001516514844464336, "loss": 1.7792, "step": 9701 }, { "epoch": 0.3474492810714989, "grad_norm": 1.739087462425232, "learning_rate": 0.00015164155211668163, "loss": 1.1891, "step": 9702 }, { "epoch": 0.3474850932010672, "grad_norm": 2.0421783924102783, "learning_rate": 0.00015163161909215913, "loss": 1.4558, "step": 9703 }, { "epoch": 0.3475209053306355, "grad_norm": 1.9003093242645264, "learning_rate": 0.00015162168537299979, "loss": 1.7258, "step": 9704 }, { "epoch": 0.3475567174602038, "grad_norm": 1.799619197845459, "learning_rate": 0.00015161175095933729, "loss": 1.4118, "step": 9705 }, { "epoch": 0.34759252958977205, "grad_norm": 2.3882851600646973, "learning_rate": 0.00015160181585130523, "loss": 1.4103, "step": 9706 }, { "epoch": 0.3476283417193403, "grad_norm": 1.882601022720337, "learning_rate": 0.00015159188004903733, "loss": 1.6516, "step": 9707 }, { "epoch": 0.34766415384890864, "grad_norm": 2.743563413619995, "learning_rate": 0.0001515819435526672, "loss": 1.6921, "step": 9708 }, { "epoch": 0.3476999659784769, "grad_norm": 1.8046648502349854, "learning_rate": 0.00015157200636232857, "loss": 1.6462, "step": 9709 }, { "epoch": 0.3477357781080452, "grad_norm": 1.5821819305419922, "learning_rate": 0.0001515620684781551, "loss": 1.6373, "step": 9710 }, { "epoch": 0.3477715902376135, "grad_norm": 1.2368242740631104, "learning_rate": 0.00015155212990028053, "loss": 1.4887, "step": 9711 }, { "epoch": 0.3478074023671818, "grad_norm": 1.3953620195388794, "learning_rate": 0.00015154219062883854, "loss": 1.348, "step": 9712 }, { "epoch": 0.34784321449675004, "grad_norm": 1.7120468616485596, "learning_rate": 0.00015153225066396288, "loss": 1.4454, "step": 9713 }, { "epoch": 0.3478790266263183, "grad_norm": 1.358496904373169, "learning_rate": 0.00015152231000578723, "loss": 1.3551, "step": 9714 }, { "epoch": 0.34791483875588664, "grad_norm": 1.2302814722061157, "learning_rate": 0.00015151236865444537, "loss": 1.5733, "step": 9715 }, { "epoch": 0.3479506508854549, "grad_norm": 2.139761447906494, "learning_rate": 0.00015150242661007103, "loss": 1.3676, "step": 9716 }, { "epoch": 0.3479864630150232, "grad_norm": 2.1644511222839355, "learning_rate": 0.000151492483872798, "loss": 1.493, "step": 9717 }, { "epoch": 0.3480222751445915, "grad_norm": 2.0593531131744385, "learning_rate": 0.00015148254044276, "loss": 1.788, "step": 9718 }, { "epoch": 0.34805808727415977, "grad_norm": 1.453326940536499, "learning_rate": 0.00015147259632009082, "loss": 1.2916, "step": 9719 }, { "epoch": 0.34809389940372804, "grad_norm": 1.3180242776870728, "learning_rate": 0.00015146265150492428, "loss": 1.4568, "step": 9720 }, { "epoch": 0.3481297115332963, "grad_norm": 1.6377161741256714, "learning_rate": 0.0001514527059973941, "loss": 1.5079, "step": 9721 }, { "epoch": 0.34816552366286463, "grad_norm": 1.53775155544281, "learning_rate": 0.00015144275979763416, "loss": 1.7657, "step": 9722 }, { "epoch": 0.3482013357924329, "grad_norm": 1.615124225616455, "learning_rate": 0.0001514328129057782, "loss": 1.5548, "step": 9723 }, { "epoch": 0.34823714792200117, "grad_norm": 2.9852917194366455, "learning_rate": 0.00015142286532196018, "loss": 1.5382, "step": 9724 }, { "epoch": 0.3482729600515695, "grad_norm": 1.7540369033813477, "learning_rate": 0.00015141291704631374, "loss": 1.539, "step": 9725 }, { "epoch": 0.34830877218113776, "grad_norm": 1.644862174987793, "learning_rate": 0.00015140296807897289, "loss": 1.495, "step": 9726 }, { "epoch": 0.34834458431070603, "grad_norm": 1.7135051488876343, "learning_rate": 0.00015139301842007137, "loss": 1.623, "step": 9727 }, { "epoch": 0.3483803964402743, "grad_norm": 1.8585213422775269, "learning_rate": 0.0001513830680697431, "loss": 1.4913, "step": 9728 }, { "epoch": 0.3484162085698426, "grad_norm": 1.7377535104751587, "learning_rate": 0.0001513731170281219, "loss": 1.3441, "step": 9729 }, { "epoch": 0.3484520206994109, "grad_norm": 1.4166041612625122, "learning_rate": 0.00015136316529534168, "loss": 1.6572, "step": 9730 }, { "epoch": 0.34848783282897916, "grad_norm": 2.0903475284576416, "learning_rate": 0.00015135321287153636, "loss": 1.2165, "step": 9731 }, { "epoch": 0.3485236449585475, "grad_norm": 1.809444785118103, "learning_rate": 0.00015134325975683975, "loss": 1.5006, "step": 9732 }, { "epoch": 0.34855945708811575, "grad_norm": 2.5409252643585205, "learning_rate": 0.00015133330595138586, "loss": 1.9359, "step": 9733 }, { "epoch": 0.348595269217684, "grad_norm": 1.4193686246871948, "learning_rate": 0.00015132335145530854, "loss": 1.4533, "step": 9734 }, { "epoch": 0.3486310813472523, "grad_norm": 1.9613677263259888, "learning_rate": 0.00015131339626874173, "loss": 1.3445, "step": 9735 }, { "epoch": 0.3486668934768206, "grad_norm": 1.5214570760726929, "learning_rate": 0.00015130344039181935, "loss": 1.2932, "step": 9736 }, { "epoch": 0.3487027056063889, "grad_norm": 1.8563958406448364, "learning_rate": 0.00015129348382467535, "loss": 1.876, "step": 9737 }, { "epoch": 0.34873851773595715, "grad_norm": 1.4242786169052124, "learning_rate": 0.00015128352656744372, "loss": 1.6204, "step": 9738 }, { "epoch": 0.3487743298655255, "grad_norm": 2.609499931335449, "learning_rate": 0.00015127356862025835, "loss": 1.422, "step": 9739 }, { "epoch": 0.34881014199509375, "grad_norm": 1.4955037832260132, "learning_rate": 0.00015126360998325326, "loss": 1.4913, "step": 9740 }, { "epoch": 0.348845954124662, "grad_norm": 1.7982819080352783, "learning_rate": 0.00015125365065656244, "loss": 1.2715, "step": 9741 }, { "epoch": 0.3488817662542303, "grad_norm": 2.949068307876587, "learning_rate": 0.00015124369064031988, "loss": 1.8439, "step": 9742 }, { "epoch": 0.3489175783837986, "grad_norm": 1.6375192403793335, "learning_rate": 0.00015123372993465953, "loss": 1.7551, "step": 9743 }, { "epoch": 0.3489533905133669, "grad_norm": 1.3728387355804443, "learning_rate": 0.00015122376853971545, "loss": 1.5968, "step": 9744 }, { "epoch": 0.34898920264293515, "grad_norm": 2.1111888885498047, "learning_rate": 0.00015121380645562163, "loss": 1.5258, "step": 9745 }, { "epoch": 0.3490250147725035, "grad_norm": 1.3476275205612183, "learning_rate": 0.0001512038436825121, "loss": 1.368, "step": 9746 }, { "epoch": 0.34906082690207174, "grad_norm": 1.5277924537658691, "learning_rate": 0.0001511938802205209, "loss": 1.5331, "step": 9747 }, { "epoch": 0.34909663903164, "grad_norm": 1.4983168840408325, "learning_rate": 0.0001511839160697821, "loss": 1.3779, "step": 9748 }, { "epoch": 0.3491324511612083, "grad_norm": 1.4858800172805786, "learning_rate": 0.00015117395123042968, "loss": 1.2519, "step": 9749 }, { "epoch": 0.3491682632907766, "grad_norm": 1.6948373317718506, "learning_rate": 0.0001511639857025978, "loss": 1.7947, "step": 9750 }, { "epoch": 0.3492040754203449, "grad_norm": 1.306716799736023, "learning_rate": 0.0001511540194864205, "loss": 1.4975, "step": 9751 }, { "epoch": 0.34923988754991314, "grad_norm": 1.2586356401443481, "learning_rate": 0.0001511440525820318, "loss": 1.5344, "step": 9752 }, { "epoch": 0.34927569967948147, "grad_norm": 1.4947229623794556, "learning_rate": 0.0001511340849895659, "loss": 1.4292, "step": 9753 }, { "epoch": 0.34931151180904974, "grad_norm": 1.699182391166687, "learning_rate": 0.0001511241167091568, "loss": 1.6858, "step": 9754 }, { "epoch": 0.349347323938618, "grad_norm": 1.7800447940826416, "learning_rate": 0.0001511141477409387, "loss": 1.441, "step": 9755 }, { "epoch": 0.3493831360681863, "grad_norm": 1.2749764919281006, "learning_rate": 0.00015110417808504562, "loss": 1.682, "step": 9756 }, { "epoch": 0.3494189481977546, "grad_norm": 1.828865885734558, "learning_rate": 0.00015109420774161178, "loss": 1.5552, "step": 9757 }, { "epoch": 0.34945476032732287, "grad_norm": 1.4188942909240723, "learning_rate": 0.00015108423671077124, "loss": 1.6542, "step": 9758 }, { "epoch": 0.34949057245689114, "grad_norm": 1.8428118228912354, "learning_rate": 0.00015107426499265825, "loss": 1.5784, "step": 9759 }, { "epoch": 0.34952638458645946, "grad_norm": 1.4908447265625, "learning_rate": 0.00015106429258740687, "loss": 1.6085, "step": 9760 }, { "epoch": 0.34956219671602773, "grad_norm": 1.5967750549316406, "learning_rate": 0.00015105431949515133, "loss": 1.1694, "step": 9761 }, { "epoch": 0.349598008845596, "grad_norm": 1.6794275045394897, "learning_rate": 0.00015104434571602577, "loss": 1.4624, "step": 9762 }, { "epoch": 0.34963382097516427, "grad_norm": 1.981397271156311, "learning_rate": 0.00015103437125016432, "loss": 1.3633, "step": 9763 }, { "epoch": 0.3496696331047326, "grad_norm": 2.4205143451690674, "learning_rate": 0.0001510243960977013, "loss": 1.7959, "step": 9764 }, { "epoch": 0.34970544523430086, "grad_norm": 2.2734334468841553, "learning_rate": 0.00015101442025877083, "loss": 1.3619, "step": 9765 }, { "epoch": 0.34974125736386913, "grad_norm": 1.8504884243011475, "learning_rate": 0.00015100444373350715, "loss": 1.6551, "step": 9766 }, { "epoch": 0.34977706949343745, "grad_norm": 1.5831323862075806, "learning_rate": 0.00015099446652204446, "loss": 1.5193, "step": 9767 }, { "epoch": 0.3498128816230057, "grad_norm": 1.8388887643814087, "learning_rate": 0.00015098448862451702, "loss": 1.4238, "step": 9768 }, { "epoch": 0.349848693752574, "grad_norm": 1.5945453643798828, "learning_rate": 0.00015097451004105904, "loss": 1.4274, "step": 9769 }, { "epoch": 0.34988450588214226, "grad_norm": 1.5054142475128174, "learning_rate": 0.00015096453077180475, "loss": 1.506, "step": 9770 }, { "epoch": 0.3499203180117106, "grad_norm": 1.8652490377426147, "learning_rate": 0.00015095455081688846, "loss": 1.8354, "step": 9771 }, { "epoch": 0.34995613014127885, "grad_norm": 1.8523324728012085, "learning_rate": 0.0001509445701764444, "loss": 1.6814, "step": 9772 }, { "epoch": 0.3499919422708471, "grad_norm": 1.3865050077438354, "learning_rate": 0.00015093458885060687, "loss": 1.2962, "step": 9773 }, { "epoch": 0.35002775440041545, "grad_norm": 1.6977640390396118, "learning_rate": 0.00015092460683951015, "loss": 1.4121, "step": 9774 }, { "epoch": 0.3500635665299837, "grad_norm": 2.1910016536712646, "learning_rate": 0.00015091462414328855, "loss": 1.9099, "step": 9775 }, { "epoch": 0.350099378659552, "grad_norm": 2.3362877368927, "learning_rate": 0.00015090464076207634, "loss": 1.7061, "step": 9776 }, { "epoch": 0.35013519078912025, "grad_norm": 1.5573481321334839, "learning_rate": 0.0001508946566960079, "loss": 1.6889, "step": 9777 }, { "epoch": 0.3501710029186886, "grad_norm": 1.7748955488204956, "learning_rate": 0.0001508846719452174, "loss": 1.3411, "step": 9778 }, { "epoch": 0.35020681504825685, "grad_norm": 1.7088675498962402, "learning_rate": 0.00015087468650983935, "loss": 1.5765, "step": 9779 }, { "epoch": 0.3502426271778251, "grad_norm": 2.062631368637085, "learning_rate": 0.00015086470039000802, "loss": 1.5179, "step": 9780 }, { "epoch": 0.35027843930739344, "grad_norm": 1.7780126333236694, "learning_rate": 0.00015085471358585774, "loss": 1.5016, "step": 9781 }, { "epoch": 0.3503142514369617, "grad_norm": 1.4579542875289917, "learning_rate": 0.00015084472609752284, "loss": 1.5187, "step": 9782 }, { "epoch": 0.35035006356653, "grad_norm": 1.4404877424240112, "learning_rate": 0.0001508347379251378, "loss": 1.5978, "step": 9783 }, { "epoch": 0.35038587569609825, "grad_norm": 1.950384259223938, "learning_rate": 0.0001508247490688369, "loss": 1.4719, "step": 9784 }, { "epoch": 0.3504216878256666, "grad_norm": 1.659462571144104, "learning_rate": 0.0001508147595287546, "loss": 1.6149, "step": 9785 }, { "epoch": 0.35045749995523484, "grad_norm": 1.4204747676849365, "learning_rate": 0.00015080476930502522, "loss": 1.6546, "step": 9786 }, { "epoch": 0.3504933120848031, "grad_norm": 1.5785166025161743, "learning_rate": 0.0001507947783977832, "loss": 1.5731, "step": 9787 }, { "epoch": 0.35052912421437143, "grad_norm": 1.9538383483886719, "learning_rate": 0.00015078478680716299, "loss": 1.473, "step": 9788 }, { "epoch": 0.3505649363439397, "grad_norm": 1.3947443962097168, "learning_rate": 0.00015077479453329894, "loss": 1.327, "step": 9789 }, { "epoch": 0.350600748473508, "grad_norm": 2.454119920730591, "learning_rate": 0.0001507648015763256, "loss": 1.497, "step": 9790 }, { "epoch": 0.35063656060307624, "grad_norm": 2.188707113265991, "learning_rate": 0.00015075480793637724, "loss": 1.8418, "step": 9791 }, { "epoch": 0.35067237273264457, "grad_norm": 1.5564440488815308, "learning_rate": 0.00015074481361358844, "loss": 1.6697, "step": 9792 }, { "epoch": 0.35070818486221284, "grad_norm": 2.128994941711426, "learning_rate": 0.00015073481860809363, "loss": 1.3797, "step": 9793 }, { "epoch": 0.3507439969917811, "grad_norm": 1.9400798082351685, "learning_rate": 0.00015072482292002732, "loss": 1.5212, "step": 9794 }, { "epoch": 0.35077980912134943, "grad_norm": 1.5561938285827637, "learning_rate": 0.0001507148265495239, "loss": 1.4001, "step": 9795 }, { "epoch": 0.3508156212509177, "grad_norm": 1.46834135055542, "learning_rate": 0.00015070482949671794, "loss": 1.3285, "step": 9796 }, { "epoch": 0.35085143338048597, "grad_norm": 1.8120313882827759, "learning_rate": 0.0001506948317617439, "loss": 1.809, "step": 9797 }, { "epoch": 0.35088724551005424, "grad_norm": 1.6378916501998901, "learning_rate": 0.00015068483334473623, "loss": 1.6557, "step": 9798 }, { "epoch": 0.35092305763962256, "grad_norm": 1.4764701128005981, "learning_rate": 0.00015067483424582956, "loss": 1.5015, "step": 9799 }, { "epoch": 0.35095886976919083, "grad_norm": 1.7734726667404175, "learning_rate": 0.00015066483446515836, "loss": 1.2686, "step": 9800 }, { "epoch": 0.3509946818987591, "grad_norm": 1.5742462873458862, "learning_rate": 0.00015065483400285716, "loss": 1.6867, "step": 9801 }, { "epoch": 0.3510304940283274, "grad_norm": 1.916930079460144, "learning_rate": 0.00015064483285906052, "loss": 1.4477, "step": 9802 }, { "epoch": 0.3510663061578957, "grad_norm": 1.2710362672805786, "learning_rate": 0.00015063483103390296, "loss": 1.4712, "step": 9803 }, { "epoch": 0.35110211828746396, "grad_norm": 1.4401026964187622, "learning_rate": 0.00015062482852751908, "loss": 1.4466, "step": 9804 }, { "epoch": 0.35113793041703223, "grad_norm": 1.357136845588684, "learning_rate": 0.0001506148253400434, "loss": 1.4442, "step": 9805 }, { "epoch": 0.35117374254660055, "grad_norm": 1.5309224128723145, "learning_rate": 0.00015060482147161058, "loss": 1.4533, "step": 9806 }, { "epoch": 0.3512095546761688, "grad_norm": 1.6512397527694702, "learning_rate": 0.00015059481692235514, "loss": 1.596, "step": 9807 }, { "epoch": 0.3512453668057371, "grad_norm": 1.5348283052444458, "learning_rate": 0.0001505848116924117, "loss": 1.4335, "step": 9808 }, { "epoch": 0.35128117893530536, "grad_norm": 1.6342748403549194, "learning_rate": 0.00015057480578191485, "loss": 1.1626, "step": 9809 }, { "epoch": 0.3513169910648737, "grad_norm": 1.6329182386398315, "learning_rate": 0.00015056479919099927, "loss": 1.5846, "step": 9810 }, { "epoch": 0.35135280319444195, "grad_norm": 1.609890341758728, "learning_rate": 0.0001505547919197995, "loss": 1.5383, "step": 9811 }, { "epoch": 0.3513886153240102, "grad_norm": 1.6261221170425415, "learning_rate": 0.00015054478396845026, "loss": 1.6088, "step": 9812 }, { "epoch": 0.35142442745357855, "grad_norm": 1.727931261062622, "learning_rate": 0.00015053477533708608, "loss": 1.6369, "step": 9813 }, { "epoch": 0.3514602395831468, "grad_norm": 1.760061264038086, "learning_rate": 0.00015052476602584177, "loss": 1.5403, "step": 9814 }, { "epoch": 0.3514960517127151, "grad_norm": 1.8632763624191284, "learning_rate": 0.00015051475603485183, "loss": 1.1816, "step": 9815 }, { "epoch": 0.35153186384228335, "grad_norm": 1.4889694452285767, "learning_rate": 0.00015050474536425101, "loss": 1.5174, "step": 9816 }, { "epoch": 0.3515676759718517, "grad_norm": 1.9194355010986328, "learning_rate": 0.00015049473401417403, "loss": 1.6779, "step": 9817 }, { "epoch": 0.35160348810141995, "grad_norm": 1.6100517511367798, "learning_rate": 0.00015048472198475553, "loss": 1.5647, "step": 9818 }, { "epoch": 0.3516393002309882, "grad_norm": 1.5931763648986816, "learning_rate": 0.00015047470927613018, "loss": 1.6965, "step": 9819 }, { "epoch": 0.35167511236055654, "grad_norm": 1.833192229270935, "learning_rate": 0.00015046469588843276, "loss": 1.5959, "step": 9820 }, { "epoch": 0.3517109244901248, "grad_norm": 2.009188175201416, "learning_rate": 0.00015045468182179795, "loss": 1.8723, "step": 9821 }, { "epoch": 0.3517467366196931, "grad_norm": 1.9709876775741577, "learning_rate": 0.0001504446670763605, "loss": 1.677, "step": 9822 }, { "epoch": 0.35178254874926135, "grad_norm": 1.6057246923446655, "learning_rate": 0.0001504346516522551, "loss": 1.7295, "step": 9823 }, { "epoch": 0.3518183608788297, "grad_norm": 1.5857231616973877, "learning_rate": 0.00015042463554961648, "loss": 1.4851, "step": 9824 }, { "epoch": 0.35185417300839794, "grad_norm": 2.4867377281188965, "learning_rate": 0.0001504146187685795, "loss": 1.5757, "step": 9825 }, { "epoch": 0.3518899851379662, "grad_norm": 1.5649453401565552, "learning_rate": 0.0001504046013092788, "loss": 1.2162, "step": 9826 }, { "epoch": 0.35192579726753453, "grad_norm": 1.9253320693969727, "learning_rate": 0.00015039458317184923, "loss": 1.6233, "step": 9827 }, { "epoch": 0.3519616093971028, "grad_norm": 1.2841295003890991, "learning_rate": 0.00015038456435642554, "loss": 1.5658, "step": 9828 }, { "epoch": 0.3519974215266711, "grad_norm": 1.5589649677276611, "learning_rate": 0.0001503745448631425, "loss": 1.6228, "step": 9829 }, { "epoch": 0.35203323365623934, "grad_norm": 2.096522808074951, "learning_rate": 0.00015036452469213504, "loss": 1.5682, "step": 9830 }, { "epoch": 0.35206904578580767, "grad_norm": 1.6603925228118896, "learning_rate": 0.00015035450384353775, "loss": 1.6372, "step": 9831 }, { "epoch": 0.35210485791537594, "grad_norm": 1.6871788501739502, "learning_rate": 0.0001503444823174856, "loss": 1.7357, "step": 9832 }, { "epoch": 0.3521406700449442, "grad_norm": 1.7978812456130981, "learning_rate": 0.0001503344601141134, "loss": 1.3659, "step": 9833 }, { "epoch": 0.35217648217451253, "grad_norm": 1.4551135301589966, "learning_rate": 0.00015032443723355597, "loss": 1.5676, "step": 9834 }, { "epoch": 0.3522122943040808, "grad_norm": 2.247584819793701, "learning_rate": 0.0001503144136759481, "loss": 1.3566, "step": 9835 }, { "epoch": 0.35224810643364907, "grad_norm": 1.6647343635559082, "learning_rate": 0.00015030438944142475, "loss": 1.5347, "step": 9836 }, { "epoch": 0.35228391856321734, "grad_norm": 1.3498740196228027, "learning_rate": 0.0001502943645301207, "loss": 1.241, "step": 9837 }, { "epoch": 0.35231973069278566, "grad_norm": 1.8701205253601074, "learning_rate": 0.00015028433894217087, "loss": 1.4011, "step": 9838 }, { "epoch": 0.35235554282235393, "grad_norm": 1.4306402206420898, "learning_rate": 0.0001502743126777101, "loss": 1.493, "step": 9839 }, { "epoch": 0.3523913549519222, "grad_norm": 1.9978787899017334, "learning_rate": 0.0001502642857368733, "loss": 1.591, "step": 9840 }, { "epoch": 0.3524271670814905, "grad_norm": 1.7107425928115845, "learning_rate": 0.00015025425811979542, "loss": 1.2661, "step": 9841 }, { "epoch": 0.3524629792110588, "grad_norm": 1.2457308769226074, "learning_rate": 0.00015024422982661125, "loss": 1.5245, "step": 9842 }, { "epoch": 0.35249879134062706, "grad_norm": 1.4251497983932495, "learning_rate": 0.00015023420085745584, "loss": 1.5731, "step": 9843 }, { "epoch": 0.35253460347019533, "grad_norm": 2.0676839351654053, "learning_rate": 0.00015022417121246398, "loss": 1.4724, "step": 9844 }, { "epoch": 0.35257041559976365, "grad_norm": 1.7826956510543823, "learning_rate": 0.00015021414089177077, "loss": 1.7058, "step": 9845 }, { "epoch": 0.3526062277293319, "grad_norm": 1.7209420204162598, "learning_rate": 0.000150204109895511, "loss": 1.5886, "step": 9846 }, { "epoch": 0.3526420398589002, "grad_norm": 1.6267695426940918, "learning_rate": 0.00015019407822381973, "loss": 1.4277, "step": 9847 }, { "epoch": 0.3526778519884685, "grad_norm": 1.7787023782730103, "learning_rate": 0.00015018404587683186, "loss": 1.7895, "step": 9848 }, { "epoch": 0.3527136641180368, "grad_norm": 1.4573239088058472, "learning_rate": 0.0001501740128546824, "loss": 1.278, "step": 9849 }, { "epoch": 0.35274947624760505, "grad_norm": 1.3814666271209717, "learning_rate": 0.00015016397915750633, "loss": 1.2868, "step": 9850 }, { "epoch": 0.3527852883771733, "grad_norm": 2.118358850479126, "learning_rate": 0.0001501539447854386, "loss": 1.3829, "step": 9851 }, { "epoch": 0.35282110050674165, "grad_norm": 1.9069745540618896, "learning_rate": 0.00015014390973861424, "loss": 1.7754, "step": 9852 }, { "epoch": 0.3528569126363099, "grad_norm": 2.0276107788085938, "learning_rate": 0.00015013387401716823, "loss": 1.4873, "step": 9853 }, { "epoch": 0.3528927247658782, "grad_norm": 2.06880784034729, "learning_rate": 0.00015012383762123566, "loss": 1.729, "step": 9854 }, { "epoch": 0.3529285368954465, "grad_norm": 2.1052966117858887, "learning_rate": 0.0001501138005509515, "loss": 1.46, "step": 9855 }, { "epoch": 0.3529643490250148, "grad_norm": 1.9557901620864868, "learning_rate": 0.00015010376280645077, "loss": 1.4189, "step": 9856 }, { "epoch": 0.35300016115458305, "grad_norm": 1.9544841051101685, "learning_rate": 0.00015009372438786858, "loss": 1.7716, "step": 9857 }, { "epoch": 0.3530359732841513, "grad_norm": 1.9688549041748047, "learning_rate": 0.00015008368529533992, "loss": 1.7624, "step": 9858 }, { "epoch": 0.35307178541371964, "grad_norm": 1.3620836734771729, "learning_rate": 0.00015007364552899988, "loss": 1.6062, "step": 9859 }, { "epoch": 0.3531075975432879, "grad_norm": 1.8967136144638062, "learning_rate": 0.0001500636050889835, "loss": 1.1698, "step": 9860 }, { "epoch": 0.3531434096728562, "grad_norm": 1.6941111087799072, "learning_rate": 0.00015005356397542597, "loss": 1.7253, "step": 9861 }, { "epoch": 0.3531792218024245, "grad_norm": 1.463535189628601, "learning_rate": 0.00015004352218846222, "loss": 1.5704, "step": 9862 }, { "epoch": 0.3532150339319928, "grad_norm": 2.1995041370391846, "learning_rate": 0.00015003347972822746, "loss": 1.7306, "step": 9863 }, { "epoch": 0.35325084606156104, "grad_norm": 1.662667155265808, "learning_rate": 0.00015002343659485678, "loss": 1.6146, "step": 9864 }, { "epoch": 0.3532866581911293, "grad_norm": 1.8545045852661133, "learning_rate": 0.0001500133927884853, "loss": 1.7385, "step": 9865 }, { "epoch": 0.35332247032069763, "grad_norm": 1.3238962888717651, "learning_rate": 0.0001500033483092481, "loss": 1.477, "step": 9866 }, { "epoch": 0.3533582824502659, "grad_norm": 2.3159918785095215, "learning_rate": 0.00014999330315728035, "loss": 1.4599, "step": 9867 }, { "epoch": 0.3533940945798342, "grad_norm": 1.7593953609466553, "learning_rate": 0.00014998325733271722, "loss": 1.3809, "step": 9868 }, { "epoch": 0.3534299067094025, "grad_norm": 1.9399266242980957, "learning_rate": 0.0001499732108356938, "loss": 1.5886, "step": 9869 }, { "epoch": 0.35346571883897077, "grad_norm": 1.7394417524337769, "learning_rate": 0.00014996316366634532, "loss": 1.4151, "step": 9870 }, { "epoch": 0.35350153096853904, "grad_norm": 2.505788564682007, "learning_rate": 0.0001499531158248069, "loss": 1.7199, "step": 9871 }, { "epoch": 0.3535373430981073, "grad_norm": 1.7400318384170532, "learning_rate": 0.00014994306731121374, "loss": 1.7993, "step": 9872 }, { "epoch": 0.35357315522767563, "grad_norm": 1.9016368389129639, "learning_rate": 0.00014993301812570104, "loss": 1.54, "step": 9873 }, { "epoch": 0.3536089673572439, "grad_norm": 1.5940747261047363, "learning_rate": 0.00014992296826840402, "loss": 1.4284, "step": 9874 }, { "epoch": 0.35364477948681217, "grad_norm": 2.1987946033477783, "learning_rate": 0.00014991291773945782, "loss": 1.4855, "step": 9875 }, { "epoch": 0.3536805916163805, "grad_norm": 1.4558799266815186, "learning_rate": 0.0001499028665389977, "loss": 1.8473, "step": 9876 }, { "epoch": 0.35371640374594876, "grad_norm": 1.5671563148498535, "learning_rate": 0.00014989281466715887, "loss": 1.5182, "step": 9877 }, { "epoch": 0.35375221587551703, "grad_norm": 1.395065188407898, "learning_rate": 0.0001498827621240766, "loss": 1.3509, "step": 9878 }, { "epoch": 0.3537880280050853, "grad_norm": 1.8056888580322266, "learning_rate": 0.0001498727089098861, "loss": 1.779, "step": 9879 }, { "epoch": 0.3538238401346536, "grad_norm": 1.9839811325073242, "learning_rate": 0.00014986265502472262, "loss": 1.813, "step": 9880 }, { "epoch": 0.3538596522642219, "grad_norm": 1.4838645458221436, "learning_rate": 0.00014985260046872145, "loss": 1.1724, "step": 9881 }, { "epoch": 0.35389546439379016, "grad_norm": 1.5873481035232544, "learning_rate": 0.00014984254524201784, "loss": 1.3684, "step": 9882 }, { "epoch": 0.3539312765233585, "grad_norm": 1.2479912042617798, "learning_rate": 0.0001498324893447471, "loss": 1.1336, "step": 9883 }, { "epoch": 0.35396708865292675, "grad_norm": 1.6061489582061768, "learning_rate": 0.00014982243277704446, "loss": 1.5485, "step": 9884 }, { "epoch": 0.354002900782495, "grad_norm": 2.5165042877197266, "learning_rate": 0.0001498123755390453, "loss": 1.4389, "step": 9885 }, { "epoch": 0.3540387129120633, "grad_norm": 2.0060083866119385, "learning_rate": 0.00014980231763088482, "loss": 1.5283, "step": 9886 }, { "epoch": 0.3540745250416316, "grad_norm": 1.2914835214614868, "learning_rate": 0.00014979225905269842, "loss": 1.4375, "step": 9887 }, { "epoch": 0.3541103371711999, "grad_norm": 1.3552098274230957, "learning_rate": 0.0001497821998046214, "loss": 1.4385, "step": 9888 }, { "epoch": 0.35414614930076815, "grad_norm": 1.729246735572815, "learning_rate": 0.0001497721398867891, "loss": 1.4477, "step": 9889 }, { "epoch": 0.3541819614303365, "grad_norm": 1.430737018585205, "learning_rate": 0.00014976207929933688, "loss": 1.6311, "step": 9890 }, { "epoch": 0.35421777355990475, "grad_norm": 1.8276435136795044, "learning_rate": 0.00014975201804240005, "loss": 1.6353, "step": 9891 }, { "epoch": 0.354253585689473, "grad_norm": 1.814315915107727, "learning_rate": 0.00014974195611611402, "loss": 1.9347, "step": 9892 }, { "epoch": 0.3542893978190413, "grad_norm": 1.468151330947876, "learning_rate": 0.00014973189352061409, "loss": 1.3659, "step": 9893 }, { "epoch": 0.3543252099486096, "grad_norm": 2.0722012519836426, "learning_rate": 0.0001497218302560357, "loss": 1.5477, "step": 9894 }, { "epoch": 0.3543610220781779, "grad_norm": 1.7138365507125854, "learning_rate": 0.0001497117663225142, "loss": 1.6164, "step": 9895 }, { "epoch": 0.35439683420774615, "grad_norm": 1.5745422840118408, "learning_rate": 0.00014970170172018505, "loss": 1.566, "step": 9896 }, { "epoch": 0.35443264633731447, "grad_norm": 2.2554714679718018, "learning_rate": 0.00014969163644918358, "loss": 1.4819, "step": 9897 }, { "epoch": 0.35446845846688274, "grad_norm": 1.376289963722229, "learning_rate": 0.00014968157050964526, "loss": 1.4859, "step": 9898 }, { "epoch": 0.354504270596451, "grad_norm": 1.6688337326049805, "learning_rate": 0.00014967150390170547, "loss": 1.5748, "step": 9899 }, { "epoch": 0.3545400827260193, "grad_norm": 2.0181753635406494, "learning_rate": 0.0001496614366254997, "loss": 1.37, "step": 9900 }, { "epoch": 0.3545758948555876, "grad_norm": 1.6945894956588745, "learning_rate": 0.00014965136868116334, "loss": 1.5793, "step": 9901 }, { "epoch": 0.3546117069851559, "grad_norm": 1.9368959665298462, "learning_rate": 0.00014964130006883187, "loss": 1.6875, "step": 9902 }, { "epoch": 0.35464751911472414, "grad_norm": 1.573068380355835, "learning_rate": 0.00014963123078864073, "loss": 1.5334, "step": 9903 }, { "epoch": 0.35468333124429247, "grad_norm": 1.3953185081481934, "learning_rate": 0.0001496211608407254, "loss": 1.6489, "step": 9904 }, { "epoch": 0.35471914337386073, "grad_norm": 2.508678674697876, "learning_rate": 0.00014961109022522135, "loss": 1.9514, "step": 9905 }, { "epoch": 0.354754955503429, "grad_norm": 2.420624256134033, "learning_rate": 0.0001496010189422641, "loss": 1.4283, "step": 9906 }, { "epoch": 0.3547907676329973, "grad_norm": 1.3854237794876099, "learning_rate": 0.00014959094699198907, "loss": 1.2818, "step": 9907 }, { "epoch": 0.3548265797625656, "grad_norm": 1.5231170654296875, "learning_rate": 0.00014958087437453186, "loss": 1.4811, "step": 9908 }, { "epoch": 0.35486239189213387, "grad_norm": 1.3888393640518188, "learning_rate": 0.00014957080109002794, "loss": 1.406, "step": 9909 }, { "epoch": 0.35489820402170213, "grad_norm": 2.292565107345581, "learning_rate": 0.00014956072713861286, "loss": 1.6349, "step": 9910 }, { "epoch": 0.35493401615127046, "grad_norm": 2.0095014572143555, "learning_rate": 0.00014955065252042206, "loss": 1.6491, "step": 9911 }, { "epoch": 0.35496982828083873, "grad_norm": 2.346771240234375, "learning_rate": 0.00014954057723559115, "loss": 1.8228, "step": 9912 }, { "epoch": 0.355005640410407, "grad_norm": 1.9204994440078735, "learning_rate": 0.0001495305012842557, "loss": 1.6642, "step": 9913 }, { "epoch": 0.35504145253997527, "grad_norm": 1.8857648372650146, "learning_rate": 0.00014952042466655126, "loss": 1.5507, "step": 9914 }, { "epoch": 0.3550772646695436, "grad_norm": 1.6578903198242188, "learning_rate": 0.00014951034738261337, "loss": 1.7453, "step": 9915 }, { "epoch": 0.35511307679911186, "grad_norm": 1.5523202419281006, "learning_rate": 0.00014950026943257762, "loss": 1.6968, "step": 9916 }, { "epoch": 0.35514888892868013, "grad_norm": 2.2549266815185547, "learning_rate": 0.00014949019081657959, "loss": 1.512, "step": 9917 }, { "epoch": 0.35518470105824845, "grad_norm": 2.5637290477752686, "learning_rate": 0.00014948011153475491, "loss": 1.7135, "step": 9918 }, { "epoch": 0.3552205131878167, "grad_norm": 1.7439630031585693, "learning_rate": 0.0001494700315872391, "loss": 1.5241, "step": 9919 }, { "epoch": 0.355256325317385, "grad_norm": 2.2923781871795654, "learning_rate": 0.00014945995097416788, "loss": 1.5614, "step": 9920 }, { "epoch": 0.35529213744695326, "grad_norm": 1.4263309240341187, "learning_rate": 0.0001494498696956768, "loss": 1.618, "step": 9921 }, { "epoch": 0.3553279495765216, "grad_norm": 1.650952935218811, "learning_rate": 0.0001494397877519015, "loss": 1.531, "step": 9922 }, { "epoch": 0.35536376170608985, "grad_norm": 1.950554609298706, "learning_rate": 0.00014942970514297761, "loss": 1.3641, "step": 9923 }, { "epoch": 0.3553995738356581, "grad_norm": 1.464562177658081, "learning_rate": 0.00014941962186904083, "loss": 1.508, "step": 9924 }, { "epoch": 0.35543538596522645, "grad_norm": 1.9498792886734009, "learning_rate": 0.00014940953793022676, "loss": 1.7068, "step": 9925 }, { "epoch": 0.3554711980947947, "grad_norm": 2.579939126968384, "learning_rate": 0.00014939945332667108, "loss": 1.5399, "step": 9926 }, { "epoch": 0.355507010224363, "grad_norm": 1.616997480392456, "learning_rate": 0.00014938936805850955, "loss": 1.2796, "step": 9927 }, { "epoch": 0.35554282235393125, "grad_norm": 1.5822079181671143, "learning_rate": 0.0001493792821258777, "loss": 1.5981, "step": 9928 }, { "epoch": 0.3555786344834996, "grad_norm": 1.6461950540542603, "learning_rate": 0.00014936919552891134, "loss": 1.2901, "step": 9929 }, { "epoch": 0.35561444661306785, "grad_norm": 1.615821123123169, "learning_rate": 0.00014935910826774612, "loss": 1.7623, "step": 9930 }, { "epoch": 0.3556502587426361, "grad_norm": 2.009953260421753, "learning_rate": 0.0001493490203425178, "loss": 1.4007, "step": 9931 }, { "epoch": 0.35568607087220444, "grad_norm": 1.6070547103881836, "learning_rate": 0.00014933893175336202, "loss": 1.6903, "step": 9932 }, { "epoch": 0.3557218830017727, "grad_norm": 1.6759109497070312, "learning_rate": 0.0001493288425004146, "loss": 1.5817, "step": 9933 }, { "epoch": 0.355757695131341, "grad_norm": 2.2906291484832764, "learning_rate": 0.00014931875258381117, "loss": 1.7121, "step": 9934 }, { "epoch": 0.35579350726090925, "grad_norm": 1.4188705682754517, "learning_rate": 0.00014930866200368761, "loss": 1.81, "step": 9935 }, { "epoch": 0.35582931939047757, "grad_norm": 2.0052592754364014, "learning_rate": 0.0001492985707601796, "loss": 1.5919, "step": 9936 }, { "epoch": 0.35586513152004584, "grad_norm": 1.7482327222824097, "learning_rate": 0.00014928847885342287, "loss": 1.4017, "step": 9937 }, { "epoch": 0.3559009436496141, "grad_norm": 1.6666542291641235, "learning_rate": 0.00014927838628355327, "loss": 1.3715, "step": 9938 }, { "epoch": 0.35593675577918243, "grad_norm": 1.207485556602478, "learning_rate": 0.0001492682930507065, "loss": 1.3941, "step": 9939 }, { "epoch": 0.3559725679087507, "grad_norm": 1.869559645652771, "learning_rate": 0.00014925819915501847, "loss": 1.7862, "step": 9940 }, { "epoch": 0.356008380038319, "grad_norm": 1.9369616508483887, "learning_rate": 0.00014924810459662484, "loss": 1.6843, "step": 9941 }, { "epoch": 0.35604419216788724, "grad_norm": 2.2861275672912598, "learning_rate": 0.0001492380093756615, "loss": 1.3355, "step": 9942 }, { "epoch": 0.35608000429745557, "grad_norm": 1.4812921285629272, "learning_rate": 0.0001492279134922643, "loss": 1.4724, "step": 9943 }, { "epoch": 0.35611581642702383, "grad_norm": 1.890170931816101, "learning_rate": 0.000149217816946569, "loss": 1.403, "step": 9944 }, { "epoch": 0.3561516285565921, "grad_norm": 1.7531453371047974, "learning_rate": 0.00014920771973871147, "loss": 1.8111, "step": 9945 }, { "epoch": 0.35618744068616043, "grad_norm": 2.1455671787261963, "learning_rate": 0.00014919762186882754, "loss": 1.6263, "step": 9946 }, { "epoch": 0.3562232528157287, "grad_norm": 1.9717743396759033, "learning_rate": 0.00014918752333705303, "loss": 1.1631, "step": 9947 }, { "epoch": 0.35625906494529697, "grad_norm": 1.4446394443511963, "learning_rate": 0.00014917742414352386, "loss": 1.4625, "step": 9948 }, { "epoch": 0.35629487707486523, "grad_norm": 1.9484100341796875, "learning_rate": 0.00014916732428837593, "loss": 1.8046, "step": 9949 }, { "epoch": 0.35633068920443356, "grad_norm": 1.383619785308838, "learning_rate": 0.00014915722377174503, "loss": 1.3816, "step": 9950 }, { "epoch": 0.35636650133400183, "grad_norm": 1.6241815090179443, "learning_rate": 0.0001491471225937671, "loss": 1.5867, "step": 9951 }, { "epoch": 0.3564023134635701, "grad_norm": 1.8264425992965698, "learning_rate": 0.000149137020754578, "loss": 1.6515, "step": 9952 }, { "epoch": 0.3564381255931384, "grad_norm": 1.544845700263977, "learning_rate": 0.0001491269182543137, "loss": 1.5767, "step": 9953 }, { "epoch": 0.3564739377227067, "grad_norm": 2.761215925216675, "learning_rate": 0.0001491168150931101, "loss": 1.3875, "step": 9954 }, { "epoch": 0.35650974985227496, "grad_norm": 2.150071859359741, "learning_rate": 0.00014910671127110308, "loss": 1.3972, "step": 9955 }, { "epoch": 0.35654556198184323, "grad_norm": 1.8057886362075806, "learning_rate": 0.00014909660678842862, "loss": 1.4971, "step": 9956 }, { "epoch": 0.35658137411141155, "grad_norm": 1.5841161012649536, "learning_rate": 0.0001490865016452226, "loss": 1.6912, "step": 9957 }, { "epoch": 0.3566171862409798, "grad_norm": 1.4754817485809326, "learning_rate": 0.00014907639584162109, "loss": 1.6008, "step": 9958 }, { "epoch": 0.3566529983705481, "grad_norm": 1.5032598972320557, "learning_rate": 0.00014906628937775995, "loss": 1.6716, "step": 9959 }, { "epoch": 0.3566888105001164, "grad_norm": 1.5778634548187256, "learning_rate": 0.00014905618225377517, "loss": 1.2682, "step": 9960 }, { "epoch": 0.3567246226296847, "grad_norm": 2.124629259109497, "learning_rate": 0.00014904607446980273, "loss": 1.4935, "step": 9961 }, { "epoch": 0.35676043475925295, "grad_norm": 1.7733240127563477, "learning_rate": 0.00014903596602597864, "loss": 1.8838, "step": 9962 }, { "epoch": 0.3567962468888212, "grad_norm": 2.439082622528076, "learning_rate": 0.00014902585692243885, "loss": 1.5614, "step": 9963 }, { "epoch": 0.35683205901838955, "grad_norm": 1.8383203744888306, "learning_rate": 0.00014901574715931942, "loss": 1.386, "step": 9964 }, { "epoch": 0.3568678711479578, "grad_norm": 1.448671579360962, "learning_rate": 0.00014900563673675633, "loss": 1.3934, "step": 9965 }, { "epoch": 0.3569036832775261, "grad_norm": 1.9962146282196045, "learning_rate": 0.00014899552565488563, "loss": 1.7006, "step": 9966 }, { "epoch": 0.3569394954070944, "grad_norm": 1.699221134185791, "learning_rate": 0.0001489854139138433, "loss": 1.5871, "step": 9967 }, { "epoch": 0.3569753075366627, "grad_norm": 1.5800422430038452, "learning_rate": 0.00014897530151376545, "loss": 1.6377, "step": 9968 }, { "epoch": 0.35701111966623095, "grad_norm": 1.6919838190078735, "learning_rate": 0.00014896518845478805, "loss": 1.424, "step": 9969 }, { "epoch": 0.3570469317957992, "grad_norm": 1.6668004989624023, "learning_rate": 0.00014895507473704718, "loss": 1.6673, "step": 9970 }, { "epoch": 0.35708274392536754, "grad_norm": 1.9433567523956299, "learning_rate": 0.00014894496036067903, "loss": 1.3958, "step": 9971 }, { "epoch": 0.3571185560549358, "grad_norm": 2.3744451999664307, "learning_rate": 0.00014893484532581947, "loss": 1.5432, "step": 9972 }, { "epoch": 0.3571543681845041, "grad_norm": 1.429279088973999, "learning_rate": 0.00014892472963260475, "loss": 1.9036, "step": 9973 }, { "epoch": 0.3571901803140724, "grad_norm": 1.7184338569641113, "learning_rate": 0.00014891461328117087, "loss": 1.3598, "step": 9974 }, { "epoch": 0.35722599244364067, "grad_norm": 1.5399808883666992, "learning_rate": 0.00014890449627165398, "loss": 1.5129, "step": 9975 }, { "epoch": 0.35726180457320894, "grad_norm": 1.5570214986801147, "learning_rate": 0.00014889437860419013, "loss": 1.6888, "step": 9976 }, { "epoch": 0.3572976167027772, "grad_norm": 1.7206803560256958, "learning_rate": 0.00014888426027891553, "loss": 1.6131, "step": 9977 }, { "epoch": 0.35733342883234553, "grad_norm": 1.5917768478393555, "learning_rate": 0.00014887414129596623, "loss": 1.6743, "step": 9978 }, { "epoch": 0.3573692409619138, "grad_norm": 1.8771898746490479, "learning_rate": 0.00014886402165547845, "loss": 1.4912, "step": 9979 }, { "epoch": 0.3574050530914821, "grad_norm": 1.189568281173706, "learning_rate": 0.00014885390135758826, "loss": 1.4007, "step": 9980 }, { "epoch": 0.3574408652210504, "grad_norm": 1.9327512979507446, "learning_rate": 0.00014884378040243184, "loss": 1.6163, "step": 9981 }, { "epoch": 0.35747667735061867, "grad_norm": 1.1493306159973145, "learning_rate": 0.0001488336587901454, "loss": 1.4209, "step": 9982 }, { "epoch": 0.35751248948018693, "grad_norm": 1.4550294876098633, "learning_rate": 0.000148823536520865, "loss": 1.3286, "step": 9983 }, { "epoch": 0.3575483016097552, "grad_norm": 1.721457600593567, "learning_rate": 0.00014881341359472696, "loss": 1.6156, "step": 9984 }, { "epoch": 0.35758411373932353, "grad_norm": 1.9905215501785278, "learning_rate": 0.00014880329001186736, "loss": 1.5031, "step": 9985 }, { "epoch": 0.3576199258688918, "grad_norm": 1.5918910503387451, "learning_rate": 0.00014879316577242246, "loss": 1.4491, "step": 9986 }, { "epoch": 0.35765573799846007, "grad_norm": 2.3297674655914307, "learning_rate": 0.00014878304087652847, "loss": 1.5093, "step": 9987 }, { "epoch": 0.3576915501280284, "grad_norm": 1.8075636625289917, "learning_rate": 0.00014877291532432158, "loss": 1.4857, "step": 9988 }, { "epoch": 0.35772736225759666, "grad_norm": 1.7248746156692505, "learning_rate": 0.00014876278911593802, "loss": 1.6853, "step": 9989 }, { "epoch": 0.35776317438716493, "grad_norm": 1.792223334312439, "learning_rate": 0.00014875266225151403, "loss": 1.2533, "step": 9990 }, { "epoch": 0.3577989865167332, "grad_norm": 1.5456364154815674, "learning_rate": 0.00014874253473118586, "loss": 1.4788, "step": 9991 }, { "epoch": 0.3578347986463015, "grad_norm": 1.5730777978897095, "learning_rate": 0.00014873240655508975, "loss": 1.4009, "step": 9992 }, { "epoch": 0.3578706107758698, "grad_norm": 1.5028215646743774, "learning_rate": 0.00014872227772336197, "loss": 1.5448, "step": 9993 }, { "epoch": 0.35790642290543806, "grad_norm": 1.584532618522644, "learning_rate": 0.0001487121482361388, "loss": 1.4787, "step": 9994 }, { "epoch": 0.3579422350350064, "grad_norm": 1.7555292844772339, "learning_rate": 0.00014870201809355653, "loss": 1.3804, "step": 9995 }, { "epoch": 0.35797804716457465, "grad_norm": 1.8509167432785034, "learning_rate": 0.0001486918872957514, "loss": 1.4473, "step": 9996 }, { "epoch": 0.3580138592941429, "grad_norm": 1.639758825302124, "learning_rate": 0.00014868175584285974, "loss": 1.4596, "step": 9997 }, { "epoch": 0.3580496714237112, "grad_norm": 1.8261754512786865, "learning_rate": 0.00014867162373501786, "loss": 1.6244, "step": 9998 }, { "epoch": 0.3580854835532795, "grad_norm": 1.5407792329788208, "learning_rate": 0.00014866149097236204, "loss": 1.4145, "step": 9999 }, { "epoch": 0.3581212956828478, "grad_norm": 1.653843879699707, "learning_rate": 0.00014865135755502866, "loss": 1.3846, "step": 10000 }, { "epoch": 0.35815710781241605, "grad_norm": 1.4592039585113525, "learning_rate": 0.000148641223483154, "loss": 1.3947, "step": 10001 }, { "epoch": 0.3581929199419843, "grad_norm": 1.936527132987976, "learning_rate": 0.00014863108875687444, "loss": 1.2548, "step": 10002 }, { "epoch": 0.35822873207155265, "grad_norm": 1.7076057195663452, "learning_rate": 0.00014862095337632626, "loss": 1.3316, "step": 10003 }, { "epoch": 0.3582645442011209, "grad_norm": 1.6434259414672852, "learning_rate": 0.00014861081734164592, "loss": 1.6868, "step": 10004 }, { "epoch": 0.3583003563306892, "grad_norm": 1.6703202724456787, "learning_rate": 0.0001486006806529697, "loss": 1.4944, "step": 10005 }, { "epoch": 0.3583361684602575, "grad_norm": 1.4541386365890503, "learning_rate": 0.00014859054331043406, "loss": 1.6077, "step": 10006 }, { "epoch": 0.3583719805898258, "grad_norm": 1.5497562885284424, "learning_rate": 0.0001485804053141753, "loss": 1.5403, "step": 10007 }, { "epoch": 0.35840779271939405, "grad_norm": 1.54259192943573, "learning_rate": 0.00014857026666432988, "loss": 1.5234, "step": 10008 }, { "epoch": 0.3584436048489623, "grad_norm": 1.5208549499511719, "learning_rate": 0.00014856012736103413, "loss": 1.6961, "step": 10009 }, { "epoch": 0.35847941697853064, "grad_norm": 1.6700527667999268, "learning_rate": 0.00014854998740442454, "loss": 1.5871, "step": 10010 }, { "epoch": 0.3585152291080989, "grad_norm": 1.868094563484192, "learning_rate": 0.00014853984679463747, "loss": 1.6691, "step": 10011 }, { "epoch": 0.3585510412376672, "grad_norm": 2.0113749504089355, "learning_rate": 0.00014852970553180938, "loss": 1.1609, "step": 10012 }, { "epoch": 0.3585868533672355, "grad_norm": 1.3860893249511719, "learning_rate": 0.0001485195636160767, "loss": 1.4034, "step": 10013 }, { "epoch": 0.35862266549680377, "grad_norm": 2.6419312953948975, "learning_rate": 0.00014850942104757588, "loss": 1.5761, "step": 10014 }, { "epoch": 0.35865847762637204, "grad_norm": 1.6753020286560059, "learning_rate": 0.0001484992778264434, "loss": 1.5387, "step": 10015 }, { "epoch": 0.3586942897559403, "grad_norm": 1.7237919569015503, "learning_rate": 0.00014848913395281568, "loss": 1.4762, "step": 10016 }, { "epoch": 0.35873010188550863, "grad_norm": 1.547621488571167, "learning_rate": 0.00014847898942682922, "loss": 1.5384, "step": 10017 }, { "epoch": 0.3587659140150769, "grad_norm": 1.5596733093261719, "learning_rate": 0.00014846884424862044, "loss": 1.4112, "step": 10018 }, { "epoch": 0.35880172614464517, "grad_norm": 1.4373083114624023, "learning_rate": 0.0001484586984183259, "loss": 1.3855, "step": 10019 }, { "epoch": 0.3588375382742135, "grad_norm": 1.8072415590286255, "learning_rate": 0.0001484485519360821, "loss": 1.5663, "step": 10020 }, { "epoch": 0.35887335040378177, "grad_norm": 1.838571310043335, "learning_rate": 0.00014843840480202554, "loss": 1.401, "step": 10021 }, { "epoch": 0.35890916253335003, "grad_norm": 1.517883539199829, "learning_rate": 0.00014842825701629267, "loss": 1.2617, "step": 10022 }, { "epoch": 0.3589449746629183, "grad_norm": 1.988364577293396, "learning_rate": 0.0001484181085790201, "loss": 1.478, "step": 10023 }, { "epoch": 0.35898078679248663, "grad_norm": 1.5956732034683228, "learning_rate": 0.00014840795949034439, "loss": 1.5329, "step": 10024 }, { "epoch": 0.3590165989220549, "grad_norm": 1.7661099433898926, "learning_rate": 0.00014839780975040194, "loss": 1.465, "step": 10025 }, { "epoch": 0.35905241105162317, "grad_norm": 1.3681966066360474, "learning_rate": 0.00014838765935932944, "loss": 1.5103, "step": 10026 }, { "epoch": 0.3590882231811915, "grad_norm": 2.267117977142334, "learning_rate": 0.00014837750831726338, "loss": 1.52, "step": 10027 }, { "epoch": 0.35912403531075976, "grad_norm": 1.8167510032653809, "learning_rate": 0.00014836735662434035, "loss": 1.6152, "step": 10028 }, { "epoch": 0.35915984744032803, "grad_norm": 1.5171904563903809, "learning_rate": 0.00014835720428069693, "loss": 1.0415, "step": 10029 }, { "epoch": 0.3591956595698963, "grad_norm": 1.476212739944458, "learning_rate": 0.0001483470512864697, "loss": 1.345, "step": 10030 }, { "epoch": 0.3592314716994646, "grad_norm": 1.4677760601043701, "learning_rate": 0.00014833689764179523, "loss": 1.6914, "step": 10031 }, { "epoch": 0.3592672838290329, "grad_norm": 1.542527198791504, "learning_rate": 0.00014832674334681022, "loss": 1.5375, "step": 10032 }, { "epoch": 0.35930309595860116, "grad_norm": 1.7230689525604248, "learning_rate": 0.00014831658840165117, "loss": 1.832, "step": 10033 }, { "epoch": 0.3593389080881695, "grad_norm": 2.0981647968292236, "learning_rate": 0.00014830643280645472, "loss": 1.4325, "step": 10034 }, { "epoch": 0.35937472021773775, "grad_norm": 1.4687614440917969, "learning_rate": 0.00014829627656135757, "loss": 1.5059, "step": 10035 }, { "epoch": 0.359410532347306, "grad_norm": 1.6538559198379517, "learning_rate": 0.0001482861196664963, "loss": 1.4741, "step": 10036 }, { "epoch": 0.3594463444768743, "grad_norm": 1.6689916849136353, "learning_rate": 0.00014827596212200762, "loss": 1.5735, "step": 10037 }, { "epoch": 0.3594821566064426, "grad_norm": 1.9871309995651245, "learning_rate": 0.00014826580392802806, "loss": 1.8311, "step": 10038 }, { "epoch": 0.3595179687360109, "grad_norm": 2.3612310886383057, "learning_rate": 0.00014825564508469443, "loss": 1.7536, "step": 10039 }, { "epoch": 0.35955378086557915, "grad_norm": 1.9663517475128174, "learning_rate": 0.00014824548559214332, "loss": 1.4124, "step": 10040 }, { "epoch": 0.3595895929951475, "grad_norm": 1.631361961364746, "learning_rate": 0.0001482353254505114, "loss": 1.6325, "step": 10041 }, { "epoch": 0.35962540512471575, "grad_norm": 1.6884336471557617, "learning_rate": 0.00014822516465993546, "loss": 1.4287, "step": 10042 }, { "epoch": 0.359661217254284, "grad_norm": 1.5852062702178955, "learning_rate": 0.0001482150032205521, "loss": 1.6681, "step": 10043 }, { "epoch": 0.3596970293838523, "grad_norm": 1.4541549682617188, "learning_rate": 0.00014820484113249805, "loss": 1.3463, "step": 10044 }, { "epoch": 0.3597328415134206, "grad_norm": 1.5388379096984863, "learning_rate": 0.00014819467839591007, "loss": 1.5042, "step": 10045 }, { "epoch": 0.3597686536429889, "grad_norm": 1.8870503902435303, "learning_rate": 0.00014818451501092485, "loss": 1.5489, "step": 10046 }, { "epoch": 0.35980446577255715, "grad_norm": 1.8153746128082275, "learning_rate": 0.00014817435097767912, "loss": 1.6726, "step": 10047 }, { "epoch": 0.35984027790212547, "grad_norm": 1.4867844581604004, "learning_rate": 0.00014816418629630968, "loss": 1.3771, "step": 10048 }, { "epoch": 0.35987609003169374, "grad_norm": 1.6260762214660645, "learning_rate": 0.0001481540209669532, "loss": 1.5194, "step": 10049 }, { "epoch": 0.359911902161262, "grad_norm": 1.7411034107208252, "learning_rate": 0.0001481438549897465, "loss": 1.3861, "step": 10050 }, { "epoch": 0.3599477142908303, "grad_norm": 1.4617067575454712, "learning_rate": 0.00014813368836482632, "loss": 1.1129, "step": 10051 }, { "epoch": 0.3599835264203986, "grad_norm": 2.216752767562866, "learning_rate": 0.00014812352109232947, "loss": 1.3592, "step": 10052 }, { "epoch": 0.36001933854996687, "grad_norm": 1.7082031965255737, "learning_rate": 0.0001481133531723927, "loss": 1.2952, "step": 10053 }, { "epoch": 0.36005515067953514, "grad_norm": 2.7109858989715576, "learning_rate": 0.00014810318460515282, "loss": 1.9209, "step": 10054 }, { "epoch": 0.36009096280910347, "grad_norm": 1.6055452823638916, "learning_rate": 0.00014809301539074667, "loss": 1.3236, "step": 10055 }, { "epoch": 0.36012677493867173, "grad_norm": 1.9085619449615479, "learning_rate": 0.000148082845529311, "loss": 1.6722, "step": 10056 }, { "epoch": 0.36016258706824, "grad_norm": 1.3563839197158813, "learning_rate": 0.00014807267502098267, "loss": 1.4078, "step": 10057 }, { "epoch": 0.36019839919780827, "grad_norm": 1.5918482542037964, "learning_rate": 0.00014806250386589851, "loss": 1.5898, "step": 10058 }, { "epoch": 0.3602342113273766, "grad_norm": 1.934302806854248, "learning_rate": 0.0001480523320641954, "loss": 1.288, "step": 10059 }, { "epoch": 0.36027002345694487, "grad_norm": 2.7092247009277344, "learning_rate": 0.00014804215961601008, "loss": 1.4568, "step": 10060 }, { "epoch": 0.36030583558651313, "grad_norm": 1.5964667797088623, "learning_rate": 0.00014803198652147952, "loss": 1.8112, "step": 10061 }, { "epoch": 0.36034164771608146, "grad_norm": 1.7393267154693604, "learning_rate": 0.00014802181278074052, "loss": 1.5946, "step": 10062 }, { "epoch": 0.36037745984564973, "grad_norm": 1.6845488548278809, "learning_rate": 0.00014801163839392998, "loss": 1.544, "step": 10063 }, { "epoch": 0.360413271975218, "grad_norm": 1.5113362073898315, "learning_rate": 0.00014800146336118474, "loss": 1.5542, "step": 10064 }, { "epoch": 0.36044908410478627, "grad_norm": 1.7169773578643799, "learning_rate": 0.0001479912876826418, "loss": 1.5439, "step": 10065 }, { "epoch": 0.3604848962343546, "grad_norm": 1.4633331298828125, "learning_rate": 0.0001479811113584379, "loss": 1.595, "step": 10066 }, { "epoch": 0.36052070836392286, "grad_norm": 1.579038381576538, "learning_rate": 0.00014797093438871008, "loss": 1.4934, "step": 10067 }, { "epoch": 0.36055652049349113, "grad_norm": 1.9892122745513916, "learning_rate": 0.00014796075677359525, "loss": 1.5002, "step": 10068 }, { "epoch": 0.36059233262305945, "grad_norm": 1.6243116855621338, "learning_rate": 0.00014795057851323023, "loss": 1.5465, "step": 10069 }, { "epoch": 0.3606281447526277, "grad_norm": 1.6825838088989258, "learning_rate": 0.0001479403996077521, "loss": 1.163, "step": 10070 }, { "epoch": 0.360663956882196, "grad_norm": 2.1626789569854736, "learning_rate": 0.0001479302200572977, "loss": 1.389, "step": 10071 }, { "epoch": 0.36069976901176426, "grad_norm": 1.4115447998046875, "learning_rate": 0.00014792003986200403, "loss": 1.4016, "step": 10072 }, { "epoch": 0.3607355811413326, "grad_norm": 2.0680456161499023, "learning_rate": 0.000147909859022008, "loss": 1.4026, "step": 10073 }, { "epoch": 0.36077139327090085, "grad_norm": 1.1022719144821167, "learning_rate": 0.00014789967753744664, "loss": 1.3813, "step": 10074 }, { "epoch": 0.3608072054004691, "grad_norm": 1.9899226427078247, "learning_rate": 0.00014788949540845689, "loss": 1.4066, "step": 10075 }, { "epoch": 0.36084301753003745, "grad_norm": 1.577979564666748, "learning_rate": 0.0001478793126351758, "loss": 1.5398, "step": 10076 }, { "epoch": 0.3608788296596057, "grad_norm": 1.9945513010025024, "learning_rate": 0.00014786912921774028, "loss": 1.5577, "step": 10077 }, { "epoch": 0.360914641789174, "grad_norm": 1.5704258680343628, "learning_rate": 0.00014785894515628736, "loss": 1.6951, "step": 10078 }, { "epoch": 0.36095045391874225, "grad_norm": 1.760332703590393, "learning_rate": 0.0001478487604509541, "loss": 1.4839, "step": 10079 }, { "epoch": 0.3609862660483106, "grad_norm": 1.6892656087875366, "learning_rate": 0.00014783857510187743, "loss": 1.7012, "step": 10080 }, { "epoch": 0.36102207817787885, "grad_norm": 1.9408334493637085, "learning_rate": 0.00014782838910919449, "loss": 1.3263, "step": 10081 }, { "epoch": 0.3610578903074471, "grad_norm": 1.6504887342453003, "learning_rate": 0.00014781820247304227, "loss": 1.5619, "step": 10082 }, { "epoch": 0.36109370243701544, "grad_norm": 1.608626127243042, "learning_rate": 0.00014780801519355782, "loss": 1.461, "step": 10083 }, { "epoch": 0.3611295145665837, "grad_norm": 2.068258285522461, "learning_rate": 0.00014779782727087815, "loss": 1.5741, "step": 10084 }, { "epoch": 0.361165326696152, "grad_norm": 1.8851020336151123, "learning_rate": 0.0001477876387051404, "loss": 1.66, "step": 10085 }, { "epoch": 0.36120113882572025, "grad_norm": 1.5188018083572388, "learning_rate": 0.00014777744949648163, "loss": 1.4233, "step": 10086 }, { "epoch": 0.36123695095528857, "grad_norm": 1.492539405822754, "learning_rate": 0.00014776725964503888, "loss": 1.4101, "step": 10087 }, { "epoch": 0.36127276308485684, "grad_norm": 2.47213077545166, "learning_rate": 0.00014775706915094928, "loss": 1.7695, "step": 10088 }, { "epoch": 0.3613085752144251, "grad_norm": 3.236239194869995, "learning_rate": 0.0001477468780143499, "loss": 1.3959, "step": 10089 }, { "epoch": 0.36134438734399343, "grad_norm": 1.5189967155456543, "learning_rate": 0.00014773668623537786, "loss": 1.3773, "step": 10090 }, { "epoch": 0.3613801994735617, "grad_norm": 1.4003039598464966, "learning_rate": 0.0001477264938141703, "loss": 1.6206, "step": 10091 }, { "epoch": 0.36141601160312997, "grad_norm": 1.4691357612609863, "learning_rate": 0.00014771630075086434, "loss": 1.5497, "step": 10092 }, { "epoch": 0.36145182373269824, "grad_norm": 1.6265677213668823, "learning_rate": 0.00014770610704559708, "loss": 1.7204, "step": 10093 }, { "epoch": 0.36148763586226657, "grad_norm": 1.6044228076934814, "learning_rate": 0.0001476959126985057, "loss": 1.464, "step": 10094 }, { "epoch": 0.36152344799183483, "grad_norm": 1.8812122344970703, "learning_rate": 0.00014768571770972734, "loss": 1.5653, "step": 10095 }, { "epoch": 0.3615592601214031, "grad_norm": 1.898262858390808, "learning_rate": 0.00014767552207939913, "loss": 1.2871, "step": 10096 }, { "epoch": 0.3615950722509714, "grad_norm": 1.926661729812622, "learning_rate": 0.0001476653258076583, "loss": 1.5776, "step": 10097 }, { "epoch": 0.3616308843805397, "grad_norm": 1.504490852355957, "learning_rate": 0.00014765512889464198, "loss": 1.4083, "step": 10098 }, { "epoch": 0.36166669651010797, "grad_norm": 1.455038070678711, "learning_rate": 0.00014764493134048737, "loss": 1.553, "step": 10099 }, { "epoch": 0.36170250863967623, "grad_norm": 1.8379685878753662, "learning_rate": 0.00014763473314533166, "loss": 1.2926, "step": 10100 }, { "epoch": 0.36173832076924456, "grad_norm": 1.7743902206420898, "learning_rate": 0.0001476245343093121, "loss": 1.4345, "step": 10101 }, { "epoch": 0.36177413289881283, "grad_norm": 1.722127914428711, "learning_rate": 0.00014761433483256582, "loss": 1.528, "step": 10102 }, { "epoch": 0.3618099450283811, "grad_norm": 1.4296984672546387, "learning_rate": 0.00014760413471523012, "loss": 1.3692, "step": 10103 }, { "epoch": 0.3618457571579494, "grad_norm": 1.6753002405166626, "learning_rate": 0.00014759393395744215, "loss": 1.6153, "step": 10104 }, { "epoch": 0.3618815692875177, "grad_norm": 3.1370999813079834, "learning_rate": 0.00014758373255933924, "loss": 1.8066, "step": 10105 }, { "epoch": 0.36191738141708596, "grad_norm": 1.653550624847412, "learning_rate": 0.00014757353052105853, "loss": 1.2039, "step": 10106 }, { "epoch": 0.36195319354665423, "grad_norm": 2.0767953395843506, "learning_rate": 0.00014756332784273738, "loss": 1.678, "step": 10107 }, { "epoch": 0.36198900567622255, "grad_norm": 1.60899019241333, "learning_rate": 0.00014755312452451296, "loss": 1.8436, "step": 10108 }, { "epoch": 0.3620248178057908, "grad_norm": 1.545812726020813, "learning_rate": 0.0001475429205665226, "loss": 1.3691, "step": 10109 }, { "epoch": 0.3620606299353591, "grad_norm": 1.5063380002975464, "learning_rate": 0.0001475327159689036, "loss": 1.7184, "step": 10110 }, { "epoch": 0.3620964420649274, "grad_norm": 2.4832561016082764, "learning_rate": 0.0001475225107317932, "loss": 1.3295, "step": 10111 }, { "epoch": 0.3621322541944957, "grad_norm": 3.1396214962005615, "learning_rate": 0.00014751230485532873, "loss": 1.6655, "step": 10112 }, { "epoch": 0.36216806632406395, "grad_norm": 1.7851954698562622, "learning_rate": 0.00014750209833964747, "loss": 1.5574, "step": 10113 }, { "epoch": 0.3622038784536322, "grad_norm": 1.6681740283966064, "learning_rate": 0.00014749189118488677, "loss": 1.4129, "step": 10114 }, { "epoch": 0.36223969058320055, "grad_norm": 1.871593952178955, "learning_rate": 0.0001474816833911839, "loss": 1.536, "step": 10115 }, { "epoch": 0.3622755027127688, "grad_norm": 2.309457540512085, "learning_rate": 0.00014747147495867627, "loss": 1.3783, "step": 10116 }, { "epoch": 0.3623113148423371, "grad_norm": 2.2859158515930176, "learning_rate": 0.00014746126588750116, "loss": 1.6957, "step": 10117 }, { "epoch": 0.3623471269719054, "grad_norm": 1.851605772972107, "learning_rate": 0.00014745105617779594, "loss": 1.4928, "step": 10118 }, { "epoch": 0.3623829391014737, "grad_norm": 1.9329193830490112, "learning_rate": 0.00014744084582969793, "loss": 1.7967, "step": 10119 }, { "epoch": 0.36241875123104195, "grad_norm": 1.6493408679962158, "learning_rate": 0.00014743063484334455, "loss": 1.6187, "step": 10120 }, { "epoch": 0.3624545633606102, "grad_norm": 1.6800282001495361, "learning_rate": 0.00014742042321887322, "loss": 1.6358, "step": 10121 }, { "epoch": 0.36249037549017854, "grad_norm": 1.4881089925765991, "learning_rate": 0.00014741021095642117, "loss": 1.6944, "step": 10122 }, { "epoch": 0.3625261876197468, "grad_norm": 1.2354776859283447, "learning_rate": 0.00014739999805612596, "loss": 1.151, "step": 10123 }, { "epoch": 0.3625619997493151, "grad_norm": 1.8695483207702637, "learning_rate": 0.00014738978451812488, "loss": 1.4648, "step": 10124 }, { "epoch": 0.3625978118788834, "grad_norm": 1.4767268896102905, "learning_rate": 0.00014737957034255538, "loss": 1.5988, "step": 10125 }, { "epoch": 0.36263362400845167, "grad_norm": 1.4204144477844238, "learning_rate": 0.00014736935552955488, "loss": 1.4762, "step": 10126 }, { "epoch": 0.36266943613801994, "grad_norm": 1.6141799688339233, "learning_rate": 0.00014735914007926084, "loss": 1.4833, "step": 10127 }, { "epoch": 0.3627052482675882, "grad_norm": 2.05619740486145, "learning_rate": 0.0001473489239918106, "loss": 1.391, "step": 10128 }, { "epoch": 0.36274106039715653, "grad_norm": 1.3890151977539062, "learning_rate": 0.0001473387072673417, "loss": 1.5043, "step": 10129 }, { "epoch": 0.3627768725267248, "grad_norm": 1.4893358945846558, "learning_rate": 0.00014732848990599154, "loss": 1.4655, "step": 10130 }, { "epoch": 0.36281268465629307, "grad_norm": 1.4449459314346313, "learning_rate": 0.0001473182719078976, "loss": 1.5565, "step": 10131 }, { "epoch": 0.3628484967858614, "grad_norm": 1.4098438024520874, "learning_rate": 0.00014730805327319737, "loss": 1.5439, "step": 10132 }, { "epoch": 0.36288430891542967, "grad_norm": 1.4516136646270752, "learning_rate": 0.00014729783400202828, "loss": 1.5875, "step": 10133 }, { "epoch": 0.36292012104499793, "grad_norm": 2.057598114013672, "learning_rate": 0.00014728761409452785, "loss": 1.5132, "step": 10134 }, { "epoch": 0.3629559331745662, "grad_norm": 2.4684760570526123, "learning_rate": 0.00014727739355083357, "loss": 1.6289, "step": 10135 }, { "epoch": 0.3629917453041345, "grad_norm": 2.0605359077453613, "learning_rate": 0.00014726717237108293, "loss": 1.6129, "step": 10136 }, { "epoch": 0.3630275574337028, "grad_norm": 1.469491720199585, "learning_rate": 0.00014725695055541348, "loss": 1.4592, "step": 10137 }, { "epoch": 0.36306336956327107, "grad_norm": 1.547979474067688, "learning_rate": 0.00014724672810396272, "loss": 1.441, "step": 10138 }, { "epoch": 0.3630991816928394, "grad_norm": 1.272243618965149, "learning_rate": 0.00014723650501686817, "loss": 1.4066, "step": 10139 }, { "epoch": 0.36313499382240766, "grad_norm": 1.7786619663238525, "learning_rate": 0.00014722628129426734, "loss": 1.4726, "step": 10140 }, { "epoch": 0.36317080595197593, "grad_norm": 1.6617714166641235, "learning_rate": 0.0001472160569362979, "loss": 1.6447, "step": 10141 }, { "epoch": 0.3632066180815442, "grad_norm": 2.5883841514587402, "learning_rate": 0.0001472058319430972, "loss": 1.4975, "step": 10142 }, { "epoch": 0.3632424302111125, "grad_norm": 1.781213402748108, "learning_rate": 0.000147195606314803, "loss": 1.5525, "step": 10143 }, { "epoch": 0.3632782423406808, "grad_norm": 4.267208099365234, "learning_rate": 0.0001471853800515528, "loss": 1.6374, "step": 10144 }, { "epoch": 0.36331405447024906, "grad_norm": 1.460854172706604, "learning_rate": 0.00014717515315348413, "loss": 1.7116, "step": 10145 }, { "epoch": 0.3633498665998174, "grad_norm": 1.7079209089279175, "learning_rate": 0.00014716492562073466, "loss": 1.4572, "step": 10146 }, { "epoch": 0.36338567872938565, "grad_norm": 1.5507252216339111, "learning_rate": 0.00014715469745344196, "loss": 1.3672, "step": 10147 }, { "epoch": 0.3634214908589539, "grad_norm": 2.1776158809661865, "learning_rate": 0.00014714446865174362, "loss": 1.7472, "step": 10148 }, { "epoch": 0.3634573029885222, "grad_norm": 2.235363483428955, "learning_rate": 0.00014713423921577725, "loss": 1.4751, "step": 10149 }, { "epoch": 0.3634931151180905, "grad_norm": 2.331434726715088, "learning_rate": 0.0001471240091456805, "loss": 1.5584, "step": 10150 }, { "epoch": 0.3635289272476588, "grad_norm": 1.8828892707824707, "learning_rate": 0.00014711377844159099, "loss": 1.4935, "step": 10151 }, { "epoch": 0.36356473937722705, "grad_norm": 1.5459474325180054, "learning_rate": 0.00014710354710364637, "loss": 1.1762, "step": 10152 }, { "epoch": 0.3636005515067954, "grad_norm": 1.9720122814178467, "learning_rate": 0.00014709331513198425, "loss": 1.751, "step": 10153 }, { "epoch": 0.36363636363636365, "grad_norm": 1.7527331113815308, "learning_rate": 0.00014708308252674236, "loss": 1.3377, "step": 10154 }, { "epoch": 0.3636721757659319, "grad_norm": 1.449865698814392, "learning_rate": 0.0001470728492880583, "loss": 1.5698, "step": 10155 }, { "epoch": 0.3637079878955002, "grad_norm": 1.602096438407898, "learning_rate": 0.00014706261541606983, "loss": 1.5431, "step": 10156 }, { "epoch": 0.3637438000250685, "grad_norm": 1.5750555992126465, "learning_rate": 0.00014705238091091455, "loss": 1.5822, "step": 10157 }, { "epoch": 0.3637796121546368, "grad_norm": 1.5211970806121826, "learning_rate": 0.00014704214577273016, "loss": 1.4798, "step": 10158 }, { "epoch": 0.36381542428420505, "grad_norm": 1.606300950050354, "learning_rate": 0.00014703191000165438, "loss": 1.6259, "step": 10159 }, { "epoch": 0.36385123641377337, "grad_norm": 2.0974745750427246, "learning_rate": 0.00014702167359782493, "loss": 1.5262, "step": 10160 }, { "epoch": 0.36388704854334164, "grad_norm": 1.8589930534362793, "learning_rate": 0.0001470114365613795, "loss": 1.2566, "step": 10161 }, { "epoch": 0.3639228606729099, "grad_norm": 1.8257662057876587, "learning_rate": 0.00014700119889245582, "loss": 1.5583, "step": 10162 }, { "epoch": 0.3639586728024782, "grad_norm": 1.329789638519287, "learning_rate": 0.00014699096059119166, "loss": 1.4588, "step": 10163 }, { "epoch": 0.3639944849320465, "grad_norm": 1.6192381381988525, "learning_rate": 0.0001469807216577247, "loss": 1.6746, "step": 10164 }, { "epoch": 0.36403029706161477, "grad_norm": 3.251436948776245, "learning_rate": 0.0001469704820921928, "loss": 1.4111, "step": 10165 }, { "epoch": 0.36406610919118304, "grad_norm": 1.9543156623840332, "learning_rate": 0.00014696024189473362, "loss": 1.4814, "step": 10166 }, { "epoch": 0.36410192132075136, "grad_norm": 1.507798671722412, "learning_rate": 0.00014695000106548496, "loss": 1.4728, "step": 10167 }, { "epoch": 0.36413773345031963, "grad_norm": 2.0722126960754395, "learning_rate": 0.0001469397596045846, "loss": 1.5825, "step": 10168 }, { "epoch": 0.3641735455798879, "grad_norm": 2.018157720565796, "learning_rate": 0.0001469295175121703, "loss": 1.6858, "step": 10169 }, { "epoch": 0.36420935770945617, "grad_norm": 1.5147225856781006, "learning_rate": 0.00014691927478837987, "loss": 1.5474, "step": 10170 }, { "epoch": 0.3642451698390245, "grad_norm": 1.813011646270752, "learning_rate": 0.00014690903143335117, "loss": 1.454, "step": 10171 }, { "epoch": 0.36428098196859277, "grad_norm": 1.3621699810028076, "learning_rate": 0.00014689878744722192, "loss": 1.4512, "step": 10172 }, { "epoch": 0.36431679409816103, "grad_norm": 1.4398069381713867, "learning_rate": 0.00014688854283013001, "loss": 1.4105, "step": 10173 }, { "epoch": 0.36435260622772936, "grad_norm": 1.4563603401184082, "learning_rate": 0.0001468782975822132, "loss": 1.3174, "step": 10174 }, { "epoch": 0.3643884183572976, "grad_norm": 1.7026748657226562, "learning_rate": 0.0001468680517036094, "loss": 1.642, "step": 10175 }, { "epoch": 0.3644242304868659, "grad_norm": 1.738409399986267, "learning_rate": 0.0001468578051944564, "loss": 1.4977, "step": 10176 }, { "epoch": 0.36446004261643417, "grad_norm": 2.2785589694976807, "learning_rate": 0.00014684755805489206, "loss": 1.5793, "step": 10177 }, { "epoch": 0.3644958547460025, "grad_norm": 1.4905781745910645, "learning_rate": 0.0001468373102850543, "loss": 1.4412, "step": 10178 }, { "epoch": 0.36453166687557076, "grad_norm": 1.4216437339782715, "learning_rate": 0.0001468270618850809, "loss": 1.7233, "step": 10179 }, { "epoch": 0.36456747900513903, "grad_norm": 2.1683859825134277, "learning_rate": 0.0001468168128551098, "loss": 1.7547, "step": 10180 }, { "epoch": 0.36460329113470735, "grad_norm": 2.0887510776519775, "learning_rate": 0.00014680656319527886, "loss": 1.317, "step": 10181 }, { "epoch": 0.3646391032642756, "grad_norm": 1.405361533164978, "learning_rate": 0.00014679631290572602, "loss": 1.423, "step": 10182 }, { "epoch": 0.3646749153938439, "grad_norm": 1.915013313293457, "learning_rate": 0.00014678606198658916, "loss": 1.3918, "step": 10183 }, { "epoch": 0.36471072752341216, "grad_norm": 1.6929726600646973, "learning_rate": 0.00014677581043800615, "loss": 1.5082, "step": 10184 }, { "epoch": 0.3647465396529805, "grad_norm": 1.6973090171813965, "learning_rate": 0.00014676555826011496, "loss": 1.2838, "step": 10185 }, { "epoch": 0.36478235178254875, "grad_norm": 1.729513168334961, "learning_rate": 0.0001467553054530535, "loss": 1.9042, "step": 10186 }, { "epoch": 0.364818163912117, "grad_norm": 1.4617276191711426, "learning_rate": 0.00014674505201695971, "loss": 1.4484, "step": 10187 }, { "epoch": 0.36485397604168535, "grad_norm": 1.501288890838623, "learning_rate": 0.00014673479795197154, "loss": 1.1133, "step": 10188 }, { "epoch": 0.3648897881712536, "grad_norm": 1.6365547180175781, "learning_rate": 0.00014672454325822696, "loss": 1.5686, "step": 10189 }, { "epoch": 0.3649256003008219, "grad_norm": 1.5337694883346558, "learning_rate": 0.00014671428793586392, "loss": 1.3405, "step": 10190 }, { "epoch": 0.36496141243039015, "grad_norm": 3.3464503288269043, "learning_rate": 0.0001467040319850204, "loss": 1.5726, "step": 10191 }, { "epoch": 0.3649972245599585, "grad_norm": 1.6443172693252563, "learning_rate": 0.0001466937754058344, "loss": 1.6067, "step": 10192 }, { "epoch": 0.36503303668952675, "grad_norm": 1.2222121953964233, "learning_rate": 0.00014668351819844384, "loss": 1.3071, "step": 10193 }, { "epoch": 0.365068848819095, "grad_norm": 1.9840507507324219, "learning_rate": 0.00014667326036298675, "loss": 1.7483, "step": 10194 }, { "epoch": 0.36510466094866334, "grad_norm": 1.4311078786849976, "learning_rate": 0.00014666300189960116, "loss": 1.1324, "step": 10195 }, { "epoch": 0.3651404730782316, "grad_norm": 1.5631734132766724, "learning_rate": 0.00014665274280842508, "loss": 1.4826, "step": 10196 }, { "epoch": 0.3651762852077999, "grad_norm": 1.5126618146896362, "learning_rate": 0.0001466424830895965, "loss": 1.4611, "step": 10197 }, { "epoch": 0.36521209733736815, "grad_norm": 1.7029465436935425, "learning_rate": 0.00014663222274325353, "loss": 1.2322, "step": 10198 }, { "epoch": 0.36524790946693647, "grad_norm": 1.7353816032409668, "learning_rate": 0.0001466219617695341, "loss": 1.2907, "step": 10199 }, { "epoch": 0.36528372159650474, "grad_norm": 1.881523847579956, "learning_rate": 0.00014661170016857633, "loss": 1.6683, "step": 10200 }, { "epoch": 0.365319533726073, "grad_norm": 1.5477781295776367, "learning_rate": 0.00014660143794051827, "loss": 1.7577, "step": 10201 }, { "epoch": 0.3653553458556413, "grad_norm": 2.418309211730957, "learning_rate": 0.00014659117508549797, "loss": 1.4513, "step": 10202 }, { "epoch": 0.3653911579852096, "grad_norm": 1.7008320093154907, "learning_rate": 0.0001465809116036535, "loss": 1.6526, "step": 10203 }, { "epoch": 0.36542697011477787, "grad_norm": 1.8753713369369507, "learning_rate": 0.00014657064749512295, "loss": 1.8226, "step": 10204 }, { "epoch": 0.36546278224434614, "grad_norm": 1.8317325115203857, "learning_rate": 0.0001465603827600444, "loss": 1.5395, "step": 10205 }, { "epoch": 0.36549859437391446, "grad_norm": 1.8578588962554932, "learning_rate": 0.00014655011739855595, "loss": 1.5256, "step": 10206 }, { "epoch": 0.36553440650348273, "grad_norm": 1.3821361064910889, "learning_rate": 0.00014653985141079576, "loss": 1.4077, "step": 10207 }, { "epoch": 0.365570218633051, "grad_norm": 1.7334470748901367, "learning_rate": 0.00014652958479690185, "loss": 1.2891, "step": 10208 }, { "epoch": 0.36560603076261927, "grad_norm": 1.7649697065353394, "learning_rate": 0.00014651931755701246, "loss": 1.4587, "step": 10209 }, { "epoch": 0.3656418428921876, "grad_norm": 1.560272455215454, "learning_rate": 0.0001465090496912656, "loss": 1.2729, "step": 10210 }, { "epoch": 0.36567765502175587, "grad_norm": 2.465453624725342, "learning_rate": 0.0001464987811997995, "loss": 1.7935, "step": 10211 }, { "epoch": 0.36571346715132413, "grad_norm": 1.8573144674301147, "learning_rate": 0.00014648851208275224, "loss": 1.7883, "step": 10212 }, { "epoch": 0.36574927928089246, "grad_norm": 1.308641791343689, "learning_rate": 0.00014647824234026205, "loss": 1.2575, "step": 10213 }, { "epoch": 0.3657850914104607, "grad_norm": 2.2015023231506348, "learning_rate": 0.00014646797197246706, "loss": 1.302, "step": 10214 }, { "epoch": 0.365820903540029, "grad_norm": 1.6897261142730713, "learning_rate": 0.00014645770097950544, "loss": 1.527, "step": 10215 }, { "epoch": 0.36585671566959727, "grad_norm": 1.403554916381836, "learning_rate": 0.00014644742936151538, "loss": 1.525, "step": 10216 }, { "epoch": 0.3658925277991656, "grad_norm": 1.6537847518920898, "learning_rate": 0.00014643715711863507, "loss": 1.566, "step": 10217 }, { "epoch": 0.36592833992873386, "grad_norm": 1.7466622591018677, "learning_rate": 0.00014642688425100273, "loss": 1.8627, "step": 10218 }, { "epoch": 0.36596415205830213, "grad_norm": 1.805519700050354, "learning_rate": 0.00014641661075875652, "loss": 1.373, "step": 10219 }, { "epoch": 0.36599996418787045, "grad_norm": 1.6405415534973145, "learning_rate": 0.0001464063366420347, "loss": 1.498, "step": 10220 }, { "epoch": 0.3660357763174387, "grad_norm": 1.6992943286895752, "learning_rate": 0.00014639606190097547, "loss": 1.4758, "step": 10221 }, { "epoch": 0.366071588447007, "grad_norm": 1.6012146472930908, "learning_rate": 0.00014638578653571708, "loss": 1.5874, "step": 10222 }, { "epoch": 0.36610740057657526, "grad_norm": 1.8396260738372803, "learning_rate": 0.00014637551054639774, "loss": 1.5488, "step": 10223 }, { "epoch": 0.3661432127061436, "grad_norm": 2.8932039737701416, "learning_rate": 0.00014636523393315578, "loss": 1.3023, "step": 10224 }, { "epoch": 0.36617902483571185, "grad_norm": 1.3132829666137695, "learning_rate": 0.00014635495669612934, "loss": 1.5124, "step": 10225 }, { "epoch": 0.3662148369652801, "grad_norm": 1.9502285718917847, "learning_rate": 0.0001463446788354568, "loss": 1.6832, "step": 10226 }, { "epoch": 0.36625064909484845, "grad_norm": 1.5486946105957031, "learning_rate": 0.00014633440035127638, "loss": 1.5737, "step": 10227 }, { "epoch": 0.3662864612244167, "grad_norm": 1.6194562911987305, "learning_rate": 0.00014632412124372635, "loss": 1.3977, "step": 10228 }, { "epoch": 0.366322273353985, "grad_norm": 1.5558431148529053, "learning_rate": 0.00014631384151294507, "loss": 1.8146, "step": 10229 }, { "epoch": 0.36635808548355325, "grad_norm": 1.5941044092178345, "learning_rate": 0.00014630356115907073, "loss": 1.6239, "step": 10230 }, { "epoch": 0.3663938976131216, "grad_norm": 1.5189129114151, "learning_rate": 0.00014629328018224175, "loss": 1.4244, "step": 10231 }, { "epoch": 0.36642970974268985, "grad_norm": 1.5772870779037476, "learning_rate": 0.00014628299858259638, "loss": 1.4397, "step": 10232 }, { "epoch": 0.3664655218722581, "grad_norm": 1.4384115934371948, "learning_rate": 0.00014627271636027297, "loss": 1.5807, "step": 10233 }, { "epoch": 0.36650133400182644, "grad_norm": 1.3554954528808594, "learning_rate": 0.00014626243351540983, "loss": 1.106, "step": 10234 }, { "epoch": 0.3665371461313947, "grad_norm": 1.87827730178833, "learning_rate": 0.00014625215004814533, "loss": 1.7206, "step": 10235 }, { "epoch": 0.366572958260963, "grad_norm": 2.0337166786193848, "learning_rate": 0.00014624186595861785, "loss": 1.4494, "step": 10236 }, { "epoch": 0.36660877039053125, "grad_norm": 1.458093523979187, "learning_rate": 0.00014623158124696565, "loss": 1.6958, "step": 10237 }, { "epoch": 0.36664458252009957, "grad_norm": 1.5604604482650757, "learning_rate": 0.00014622129591332722, "loss": 1.6612, "step": 10238 }, { "epoch": 0.36668039464966784, "grad_norm": 1.7306214570999146, "learning_rate": 0.0001462110099578408, "loss": 1.6922, "step": 10239 }, { "epoch": 0.3667162067792361, "grad_norm": 1.6424694061279297, "learning_rate": 0.0001462007233806449, "loss": 1.4857, "step": 10240 }, { "epoch": 0.36675201890880443, "grad_norm": 1.6787950992584229, "learning_rate": 0.00014619043618187784, "loss": 1.0656, "step": 10241 }, { "epoch": 0.3667878310383727, "grad_norm": 2.0720412731170654, "learning_rate": 0.00014618014836167807, "loss": 1.6523, "step": 10242 }, { "epoch": 0.36682364316794097, "grad_norm": 1.870436191558838, "learning_rate": 0.00014616985992018394, "loss": 1.7099, "step": 10243 }, { "epoch": 0.36685945529750924, "grad_norm": 1.4863344430923462, "learning_rate": 0.00014615957085753394, "loss": 1.5977, "step": 10244 }, { "epoch": 0.36689526742707756, "grad_norm": 1.7124794721603394, "learning_rate": 0.00014614928117386643, "loss": 1.3756, "step": 10245 }, { "epoch": 0.36693107955664583, "grad_norm": 4.074779987335205, "learning_rate": 0.0001461389908693199, "loss": 1.6485, "step": 10246 }, { "epoch": 0.3669668916862141, "grad_norm": 1.4112684726715088, "learning_rate": 0.0001461286999440327, "loss": 1.0859, "step": 10247 }, { "epoch": 0.3670027038157824, "grad_norm": 1.4489010572433472, "learning_rate": 0.00014611840839814336, "loss": 1.3586, "step": 10248 }, { "epoch": 0.3670385159453507, "grad_norm": 1.4673880338668823, "learning_rate": 0.00014610811623179038, "loss": 1.3926, "step": 10249 }, { "epoch": 0.36707432807491897, "grad_norm": 1.5906109809875488, "learning_rate": 0.00014609782344511213, "loss": 1.2522, "step": 10250 }, { "epoch": 0.36711014020448723, "grad_norm": 1.204659342765808, "learning_rate": 0.0001460875300382471, "loss": 1.2822, "step": 10251 }, { "epoch": 0.36714595233405556, "grad_norm": 1.5988489389419556, "learning_rate": 0.00014607723601133384, "loss": 1.4988, "step": 10252 }, { "epoch": 0.3671817644636238, "grad_norm": 1.5991809368133545, "learning_rate": 0.00014606694136451082, "loss": 1.7249, "step": 10253 }, { "epoch": 0.3672175765931921, "grad_norm": 1.5101706981658936, "learning_rate": 0.0001460566460979165, "loss": 1.5289, "step": 10254 }, { "epoch": 0.3672533887227604, "grad_norm": 1.951116681098938, "learning_rate": 0.00014604635021168942, "loss": 1.5754, "step": 10255 }, { "epoch": 0.3672892008523287, "grad_norm": 2.0610105991363525, "learning_rate": 0.00014603605370596808, "loss": 1.1101, "step": 10256 }, { "epoch": 0.36732501298189696, "grad_norm": 1.4582083225250244, "learning_rate": 0.00014602575658089105, "loss": 1.5282, "step": 10257 }, { "epoch": 0.36736082511146523, "grad_norm": 1.7049942016601562, "learning_rate": 0.0001460154588365968, "loss": 1.4164, "step": 10258 }, { "epoch": 0.36739663724103355, "grad_norm": 1.7061821222305298, "learning_rate": 0.00014600516047322392, "loss": 1.6465, "step": 10259 }, { "epoch": 0.3674324493706018, "grad_norm": 2.0501723289489746, "learning_rate": 0.00014599486149091096, "loss": 1.4027, "step": 10260 }, { "epoch": 0.3674682615001701, "grad_norm": 1.5676246881484985, "learning_rate": 0.00014598456188979643, "loss": 1.7521, "step": 10261 }, { "epoch": 0.3675040736297384, "grad_norm": 1.6521248817443848, "learning_rate": 0.000145974261670019, "loss": 1.4259, "step": 10262 }, { "epoch": 0.3675398857593067, "grad_norm": 1.571914792060852, "learning_rate": 0.00014596396083171715, "loss": 1.7523, "step": 10263 }, { "epoch": 0.36757569788887495, "grad_norm": 1.5955133438110352, "learning_rate": 0.0001459536593750295, "loss": 1.3922, "step": 10264 }, { "epoch": 0.3676115100184432, "grad_norm": 1.9849090576171875, "learning_rate": 0.00014594335730009462, "loss": 1.5897, "step": 10265 }, { "epoch": 0.36764732214801155, "grad_norm": 1.507638692855835, "learning_rate": 0.00014593305460705114, "loss": 1.5146, "step": 10266 }, { "epoch": 0.3676831342775798, "grad_norm": 1.4899927377700806, "learning_rate": 0.00014592275129603766, "loss": 1.5611, "step": 10267 }, { "epoch": 0.3677189464071481, "grad_norm": 1.6360082626342773, "learning_rate": 0.00014591244736719282, "loss": 1.5844, "step": 10268 }, { "epoch": 0.3677547585367164, "grad_norm": 1.3872517347335815, "learning_rate": 0.00014590214282065518, "loss": 1.5452, "step": 10269 }, { "epoch": 0.3677905706662847, "grad_norm": 1.434220790863037, "learning_rate": 0.00014589183765656343, "loss": 1.3571, "step": 10270 }, { "epoch": 0.36782638279585295, "grad_norm": 1.7095938920974731, "learning_rate": 0.00014588153187505625, "loss": 1.4374, "step": 10271 }, { "epoch": 0.3678621949254212, "grad_norm": 1.4860492944717407, "learning_rate": 0.00014587122547627217, "loss": 1.5456, "step": 10272 }, { "epoch": 0.36789800705498954, "grad_norm": 1.56809663772583, "learning_rate": 0.00014586091846034997, "loss": 1.343, "step": 10273 }, { "epoch": 0.3679338191845578, "grad_norm": 1.3190221786499023, "learning_rate": 0.00014585061082742824, "loss": 1.5654, "step": 10274 }, { "epoch": 0.3679696313141261, "grad_norm": 1.5554225444793701, "learning_rate": 0.0001458403025776457, "loss": 1.6568, "step": 10275 }, { "epoch": 0.3680054434436944, "grad_norm": 1.8499597311019897, "learning_rate": 0.000145829993711141, "loss": 1.606, "step": 10276 }, { "epoch": 0.36804125557326267, "grad_norm": 1.9961286783218384, "learning_rate": 0.00014581968422805287, "loss": 1.6618, "step": 10277 }, { "epoch": 0.36807706770283094, "grad_norm": 1.6830213069915771, "learning_rate": 0.00014580937412852, "loss": 1.4959, "step": 10278 }, { "epoch": 0.3681128798323992, "grad_norm": 1.537229299545288, "learning_rate": 0.0001457990634126811, "loss": 1.5156, "step": 10279 }, { "epoch": 0.36814869196196753, "grad_norm": 1.7691799402236938, "learning_rate": 0.00014578875208067483, "loss": 1.2807, "step": 10280 }, { "epoch": 0.3681845040915358, "grad_norm": 1.5051552057266235, "learning_rate": 0.00014577844013264, "loss": 1.5256, "step": 10281 }, { "epoch": 0.36822031622110407, "grad_norm": 1.39759361743927, "learning_rate": 0.0001457681275687153, "loss": 1.6506, "step": 10282 }, { "epoch": 0.3682561283506724, "grad_norm": 1.8484997749328613, "learning_rate": 0.00014575781438903946, "loss": 1.5992, "step": 10283 }, { "epoch": 0.36829194048024066, "grad_norm": 1.4711723327636719, "learning_rate": 0.0001457475005937513, "loss": 1.5188, "step": 10284 }, { "epoch": 0.36832775260980893, "grad_norm": 2.3037071228027344, "learning_rate": 0.0001457371861829895, "loss": 1.5384, "step": 10285 }, { "epoch": 0.3683635647393772, "grad_norm": 1.4197289943695068, "learning_rate": 0.00014572687115689282, "loss": 1.4293, "step": 10286 }, { "epoch": 0.3683993768689455, "grad_norm": 1.5581529140472412, "learning_rate": 0.0001457165555156001, "loss": 1.2188, "step": 10287 }, { "epoch": 0.3684351889985138, "grad_norm": 1.9231946468353271, "learning_rate": 0.00014570623925925014, "loss": 1.1728, "step": 10288 }, { "epoch": 0.36847100112808207, "grad_norm": 1.9287388324737549, "learning_rate": 0.00014569592238798163, "loss": 1.4546, "step": 10289 }, { "epoch": 0.3685068132576504, "grad_norm": 2.1329243183135986, "learning_rate": 0.00014568560490193345, "loss": 1.4835, "step": 10290 }, { "epoch": 0.36854262538721866, "grad_norm": 1.5576348304748535, "learning_rate": 0.0001456752868012444, "loss": 1.4383, "step": 10291 }, { "epoch": 0.3685784375167869, "grad_norm": 1.8817024230957031, "learning_rate": 0.00014566496808605326, "loss": 1.4858, "step": 10292 }, { "epoch": 0.3686142496463552, "grad_norm": 1.5834522247314453, "learning_rate": 0.00014565464875649888, "loss": 1.5599, "step": 10293 }, { "epoch": 0.3686500617759235, "grad_norm": 1.5673562288284302, "learning_rate": 0.0001456443288127201, "loss": 1.4004, "step": 10294 }, { "epoch": 0.3686858739054918, "grad_norm": 1.6744060516357422, "learning_rate": 0.00014563400825485576, "loss": 1.5878, "step": 10295 }, { "epoch": 0.36872168603506006, "grad_norm": 1.609439492225647, "learning_rate": 0.00014562368708304467, "loss": 1.4532, "step": 10296 }, { "epoch": 0.3687574981646284, "grad_norm": 2.1061041355133057, "learning_rate": 0.00014561336529742575, "loss": 1.5246, "step": 10297 }, { "epoch": 0.36879331029419665, "grad_norm": 1.7930060625076294, "learning_rate": 0.00014560304289813785, "loss": 1.3858, "step": 10298 }, { "epoch": 0.3688291224237649, "grad_norm": 1.5626553297042847, "learning_rate": 0.0001455927198853198, "loss": 1.5151, "step": 10299 }, { "epoch": 0.3688649345533332, "grad_norm": 2.6362035274505615, "learning_rate": 0.00014558239625911052, "loss": 1.9309, "step": 10300 }, { "epoch": 0.3689007466829015, "grad_norm": 1.53107488155365, "learning_rate": 0.00014557207201964893, "loss": 1.4419, "step": 10301 }, { "epoch": 0.3689365588124698, "grad_norm": 2.428128957748413, "learning_rate": 0.00014556174716707384, "loss": 1.6143, "step": 10302 }, { "epoch": 0.36897237094203805, "grad_norm": 1.9132128953933716, "learning_rate": 0.00014555142170152423, "loss": 1.6772, "step": 10303 }, { "epoch": 0.3690081830716064, "grad_norm": 1.4515597820281982, "learning_rate": 0.00014554109562313903, "loss": 1.3292, "step": 10304 }, { "epoch": 0.36904399520117465, "grad_norm": 2.2293195724487305, "learning_rate": 0.00014553076893205708, "loss": 1.4524, "step": 10305 }, { "epoch": 0.3690798073307429, "grad_norm": 1.7687851190567017, "learning_rate": 0.00014552044162841743, "loss": 1.2241, "step": 10306 }, { "epoch": 0.3691156194603112, "grad_norm": 1.509236454963684, "learning_rate": 0.0001455101137123589, "loss": 1.6139, "step": 10307 }, { "epoch": 0.3691514315898795, "grad_norm": 1.5203906297683716, "learning_rate": 0.00014549978518402053, "loss": 1.646, "step": 10308 }, { "epoch": 0.3691872437194478, "grad_norm": 2.03283429145813, "learning_rate": 0.0001454894560435412, "loss": 1.3515, "step": 10309 }, { "epoch": 0.36922305584901605, "grad_norm": 1.2739007472991943, "learning_rate": 0.00014547912629105995, "loss": 1.3494, "step": 10310 }, { "epoch": 0.36925886797858437, "grad_norm": 1.7174979448318481, "learning_rate": 0.00014546879592671573, "loss": 1.5571, "step": 10311 }, { "epoch": 0.36929468010815264, "grad_norm": 1.639888048171997, "learning_rate": 0.00014545846495064748, "loss": 1.6965, "step": 10312 }, { "epoch": 0.3693304922377209, "grad_norm": 1.811215877532959, "learning_rate": 0.0001454481333629942, "loss": 1.5453, "step": 10313 }, { "epoch": 0.3693663043672892, "grad_norm": 2.070788621902466, "learning_rate": 0.00014543780116389496, "loss": 1.5838, "step": 10314 }, { "epoch": 0.3694021164968575, "grad_norm": 1.934891700744629, "learning_rate": 0.0001454274683534887, "loss": 1.2206, "step": 10315 }, { "epoch": 0.36943792862642577, "grad_norm": 2.129443645477295, "learning_rate": 0.00014541713493191444, "loss": 1.5475, "step": 10316 }, { "epoch": 0.36947374075599404, "grad_norm": 1.6835366487503052, "learning_rate": 0.00014540680089931125, "loss": 1.4861, "step": 10317 }, { "epoch": 0.36950955288556236, "grad_norm": 1.4131048917770386, "learning_rate": 0.00014539646625581805, "loss": 1.1092, "step": 10318 }, { "epoch": 0.36954536501513063, "grad_norm": 2.283271551132202, "learning_rate": 0.00014538613100157404, "loss": 1.4867, "step": 10319 }, { "epoch": 0.3695811771446989, "grad_norm": 1.9396806955337524, "learning_rate": 0.0001453757951367181, "loss": 1.6176, "step": 10320 }, { "epoch": 0.36961698927426717, "grad_norm": 1.417442798614502, "learning_rate": 0.00014536545866138941, "loss": 1.3439, "step": 10321 }, { "epoch": 0.3696528014038355, "grad_norm": 1.7332024574279785, "learning_rate": 0.000145355121575727, "loss": 1.4305, "step": 10322 }, { "epoch": 0.36968861353340376, "grad_norm": 1.3164316415786743, "learning_rate": 0.00014534478387986992, "loss": 1.3904, "step": 10323 }, { "epoch": 0.36972442566297203, "grad_norm": 2.070188045501709, "learning_rate": 0.0001453344455739573, "loss": 1.4226, "step": 10324 }, { "epoch": 0.36976023779254036, "grad_norm": 1.8793950080871582, "learning_rate": 0.0001453241066581281, "loss": 1.3511, "step": 10325 }, { "epoch": 0.3697960499221086, "grad_norm": 1.7022724151611328, "learning_rate": 0.0001453137671325216, "loss": 1.4012, "step": 10326 }, { "epoch": 0.3698318620516769, "grad_norm": 1.8162293434143066, "learning_rate": 0.00014530342699727676, "loss": 1.2831, "step": 10327 }, { "epoch": 0.36986767418124517, "grad_norm": 1.9489846229553223, "learning_rate": 0.0001452930862525328, "loss": 1.5896, "step": 10328 }, { "epoch": 0.3699034863108135, "grad_norm": 1.9331752061843872, "learning_rate": 0.00014528274489842872, "loss": 1.3076, "step": 10329 }, { "epoch": 0.36993929844038176, "grad_norm": 1.6232823133468628, "learning_rate": 0.00014527240293510377, "loss": 1.3819, "step": 10330 }, { "epoch": 0.36997511056995, "grad_norm": 2.704481840133667, "learning_rate": 0.000145262060362697, "loss": 1.7733, "step": 10331 }, { "epoch": 0.37001092269951835, "grad_norm": 1.7846752405166626, "learning_rate": 0.00014525171718134762, "loss": 1.5951, "step": 10332 }, { "epoch": 0.3700467348290866, "grad_norm": 1.3831145763397217, "learning_rate": 0.00014524137339119478, "loss": 1.5853, "step": 10333 }, { "epoch": 0.3700825469586549, "grad_norm": 2.4267773628234863, "learning_rate": 0.00014523102899237754, "loss": 1.3878, "step": 10334 }, { "epoch": 0.37011835908822316, "grad_norm": 1.7978699207305908, "learning_rate": 0.00014522068398503522, "loss": 1.6433, "step": 10335 }, { "epoch": 0.3701541712177915, "grad_norm": 1.7147051095962524, "learning_rate": 0.00014521033836930689, "loss": 1.0625, "step": 10336 }, { "epoch": 0.37018998334735975, "grad_norm": 2.743452310562134, "learning_rate": 0.0001451999921453318, "loss": 1.5351, "step": 10337 }, { "epoch": 0.370225795476928, "grad_norm": 1.5962437391281128, "learning_rate": 0.00014518964531324907, "loss": 1.4559, "step": 10338 }, { "epoch": 0.37026160760649635, "grad_norm": 1.8143364191055298, "learning_rate": 0.000145179297873198, "loss": 1.9998, "step": 10339 }, { "epoch": 0.3702974197360646, "grad_norm": 1.644858956336975, "learning_rate": 0.00014516894982531775, "loss": 1.4414, "step": 10340 }, { "epoch": 0.3703332318656329, "grad_norm": 1.4432719945907593, "learning_rate": 0.00014515860116974752, "loss": 1.5982, "step": 10341 }, { "epoch": 0.37036904399520115, "grad_norm": 1.5924537181854248, "learning_rate": 0.0001451482519066266, "loss": 1.3127, "step": 10342 }, { "epoch": 0.3704048561247695, "grad_norm": 1.6284945011138916, "learning_rate": 0.00014513790203609416, "loss": 1.2715, "step": 10343 }, { "epoch": 0.37044066825433775, "grad_norm": 1.8258150815963745, "learning_rate": 0.0001451275515582895, "loss": 1.7172, "step": 10344 }, { "epoch": 0.370476480383906, "grad_norm": 1.3459279537200928, "learning_rate": 0.0001451172004733518, "loss": 1.6076, "step": 10345 }, { "epoch": 0.37051229251347434, "grad_norm": 1.891798496246338, "learning_rate": 0.00014510684878142038, "loss": 1.7049, "step": 10346 }, { "epoch": 0.3705481046430426, "grad_norm": 1.2938027381896973, "learning_rate": 0.00014509649648263449, "loss": 1.6065, "step": 10347 }, { "epoch": 0.3705839167726109, "grad_norm": 1.8871210813522339, "learning_rate": 0.00014508614357713342, "loss": 1.4965, "step": 10348 }, { "epoch": 0.37061972890217915, "grad_norm": 1.41502046585083, "learning_rate": 0.00014507579006505642, "loss": 1.4462, "step": 10349 }, { "epoch": 0.37065554103174747, "grad_norm": 1.7434496879577637, "learning_rate": 0.00014506543594654288, "loss": 1.2558, "step": 10350 }, { "epoch": 0.37069135316131574, "grad_norm": 1.9141662120819092, "learning_rate": 0.00014505508122173198, "loss": 1.607, "step": 10351 }, { "epoch": 0.370727165290884, "grad_norm": 1.4460514783859253, "learning_rate": 0.00014504472589076307, "loss": 1.8445, "step": 10352 }, { "epoch": 0.37076297742045233, "grad_norm": 1.3187059164047241, "learning_rate": 0.00014503436995377548, "loss": 1.6002, "step": 10353 }, { "epoch": 0.3707987895500206, "grad_norm": 1.4016839265823364, "learning_rate": 0.00014502401341090853, "loss": 1.3959, "step": 10354 }, { "epoch": 0.37083460167958887, "grad_norm": 1.599345088005066, "learning_rate": 0.00014501365626230157, "loss": 1.9364, "step": 10355 }, { "epoch": 0.37087041380915714, "grad_norm": 2.149724006652832, "learning_rate": 0.00014500329850809394, "loss": 1.5702, "step": 10356 }, { "epoch": 0.37090622593872546, "grad_norm": 1.2839821577072144, "learning_rate": 0.00014499294014842494, "loss": 1.1688, "step": 10357 }, { "epoch": 0.37094203806829373, "grad_norm": 1.5366618633270264, "learning_rate": 0.000144982581183434, "loss": 1.2508, "step": 10358 }, { "epoch": 0.370977850197862, "grad_norm": 1.925222635269165, "learning_rate": 0.00014497222161326045, "loss": 1.4266, "step": 10359 }, { "epoch": 0.3710136623274303, "grad_norm": 1.6245794296264648, "learning_rate": 0.00014496186143804366, "loss": 1.5413, "step": 10360 }, { "epoch": 0.3710494744569986, "grad_norm": 1.6511958837509155, "learning_rate": 0.000144951500657923, "loss": 1.4162, "step": 10361 }, { "epoch": 0.37108528658656686, "grad_norm": 1.8239529132843018, "learning_rate": 0.00014494113927303792, "loss": 1.4576, "step": 10362 }, { "epoch": 0.37112109871613513, "grad_norm": 2.1355695724487305, "learning_rate": 0.00014493077728352778, "loss": 1.6973, "step": 10363 }, { "epoch": 0.37115691084570346, "grad_norm": 1.5001815557479858, "learning_rate": 0.00014492041468953194, "loss": 1.207, "step": 10364 }, { "epoch": 0.3711927229752717, "grad_norm": 1.4016623497009277, "learning_rate": 0.0001449100514911899, "loss": 1.3825, "step": 10365 }, { "epoch": 0.37122853510484, "grad_norm": 2.0167899131774902, "learning_rate": 0.00014489968768864107, "loss": 1.6569, "step": 10366 }, { "epoch": 0.3712643472344083, "grad_norm": 1.745682716369629, "learning_rate": 0.00014488932328202484, "loss": 1.574, "step": 10367 }, { "epoch": 0.3713001593639766, "grad_norm": 1.8677984476089478, "learning_rate": 0.00014487895827148067, "loss": 1.8146, "step": 10368 }, { "epoch": 0.37133597149354486, "grad_norm": 2.221468687057495, "learning_rate": 0.00014486859265714798, "loss": 1.5426, "step": 10369 }, { "epoch": 0.3713717836231131, "grad_norm": 1.536044716835022, "learning_rate": 0.00014485822643916626, "loss": 1.2835, "step": 10370 }, { "epoch": 0.37140759575268145, "grad_norm": 1.6062583923339844, "learning_rate": 0.00014484785961767498, "loss": 1.5294, "step": 10371 }, { "epoch": 0.3714434078822497, "grad_norm": 1.858888030052185, "learning_rate": 0.0001448374921928136, "loss": 1.4026, "step": 10372 }, { "epoch": 0.371479220011818, "grad_norm": 1.7808476686477661, "learning_rate": 0.00014482712416472157, "loss": 1.829, "step": 10373 }, { "epoch": 0.3715150321413863, "grad_norm": 1.4914846420288086, "learning_rate": 0.00014481675553353843, "loss": 1.2286, "step": 10374 }, { "epoch": 0.3715508442709546, "grad_norm": 1.6184641122817993, "learning_rate": 0.00014480638629940366, "loss": 1.7472, "step": 10375 }, { "epoch": 0.37158665640052285, "grad_norm": 1.7213737964630127, "learning_rate": 0.00014479601646245676, "loss": 1.4159, "step": 10376 }, { "epoch": 0.3716224685300911, "grad_norm": 1.4510760307312012, "learning_rate": 0.00014478564602283725, "loss": 1.4266, "step": 10377 }, { "epoch": 0.37165828065965945, "grad_norm": 1.8478851318359375, "learning_rate": 0.0001447752749806846, "loss": 1.5385, "step": 10378 }, { "epoch": 0.3716940927892277, "grad_norm": 1.6792529821395874, "learning_rate": 0.00014476490333613842, "loss": 1.5736, "step": 10379 }, { "epoch": 0.371729904918796, "grad_norm": 1.8026201725006104, "learning_rate": 0.00014475453108933817, "loss": 1.7353, "step": 10380 }, { "epoch": 0.3717657170483643, "grad_norm": 1.645272135734558, "learning_rate": 0.00014474415824042346, "loss": 1.2897, "step": 10381 }, { "epoch": 0.3718015291779326, "grad_norm": 1.5104624032974243, "learning_rate": 0.0001447337847895338, "loss": 1.7482, "step": 10382 }, { "epoch": 0.37183734130750085, "grad_norm": 2.0950675010681152, "learning_rate": 0.00014472341073680883, "loss": 2.0707, "step": 10383 }, { "epoch": 0.3718731534370691, "grad_norm": 1.8873536586761475, "learning_rate": 0.00014471303608238798, "loss": 1.4105, "step": 10384 }, { "epoch": 0.37190896556663744, "grad_norm": 2.1229586601257324, "learning_rate": 0.00014470266082641095, "loss": 1.9201, "step": 10385 }, { "epoch": 0.3719447776962057, "grad_norm": 2.1284492015838623, "learning_rate": 0.00014469228496901727, "loss": 1.5545, "step": 10386 }, { "epoch": 0.371980589825774, "grad_norm": 1.978696584701538, "learning_rate": 0.00014468190851034656, "loss": 1.6813, "step": 10387 }, { "epoch": 0.3720164019553423, "grad_norm": 1.5713815689086914, "learning_rate": 0.0001446715314505384, "loss": 1.6417, "step": 10388 }, { "epoch": 0.37205221408491057, "grad_norm": 1.5084996223449707, "learning_rate": 0.00014466115378973236, "loss": 1.4994, "step": 10389 }, { "epoch": 0.37208802621447884, "grad_norm": 1.6603682041168213, "learning_rate": 0.00014465077552806813, "loss": 1.8155, "step": 10390 }, { "epoch": 0.3721238383440471, "grad_norm": 1.6536749601364136, "learning_rate": 0.00014464039666568532, "loss": 1.1957, "step": 10391 }, { "epoch": 0.37215965047361543, "grad_norm": 1.334580659866333, "learning_rate": 0.00014463001720272357, "loss": 1.4941, "step": 10392 }, { "epoch": 0.3721954626031837, "grad_norm": 1.4311217069625854, "learning_rate": 0.00014461963713932247, "loss": 1.3092, "step": 10393 }, { "epoch": 0.37223127473275197, "grad_norm": 1.9430100917816162, "learning_rate": 0.00014460925647562174, "loss": 1.2872, "step": 10394 }, { "epoch": 0.3722670868623203, "grad_norm": 1.8805584907531738, "learning_rate": 0.000144598875211761, "loss": 1.7865, "step": 10395 }, { "epoch": 0.37230289899188856, "grad_norm": 1.2648087739944458, "learning_rate": 0.00014458849334787993, "loss": 1.4374, "step": 10396 }, { "epoch": 0.37233871112145683, "grad_norm": 1.6092779636383057, "learning_rate": 0.00014457811088411816, "loss": 1.559, "step": 10397 }, { "epoch": 0.3723745232510251, "grad_norm": 1.3519525527954102, "learning_rate": 0.00014456772782061545, "loss": 1.2963, "step": 10398 }, { "epoch": 0.3724103353805934, "grad_norm": 2.0180888175964355, "learning_rate": 0.00014455734415751143, "loss": 1.574, "step": 10399 }, { "epoch": 0.3724461475101617, "grad_norm": 1.8862159252166748, "learning_rate": 0.00014454695989494582, "loss": 1.651, "step": 10400 }, { "epoch": 0.37248195963972996, "grad_norm": 1.7211228609085083, "learning_rate": 0.00014453657503305832, "loss": 1.3319, "step": 10401 }, { "epoch": 0.37251777176929823, "grad_norm": 2.443127155303955, "learning_rate": 0.00014452618957198866, "loss": 1.4359, "step": 10402 }, { "epoch": 0.37255358389886656, "grad_norm": 1.24772047996521, "learning_rate": 0.00014451580351187656, "loss": 1.6141, "step": 10403 }, { "epoch": 0.3725893960284348, "grad_norm": 1.9694045782089233, "learning_rate": 0.00014450541685286173, "loss": 1.6621, "step": 10404 }, { "epoch": 0.3726252081580031, "grad_norm": 1.6941626071929932, "learning_rate": 0.00014449502959508394, "loss": 1.6966, "step": 10405 }, { "epoch": 0.3726610202875714, "grad_norm": 1.5425902605056763, "learning_rate": 0.00014448464173868293, "loss": 1.4486, "step": 10406 }, { "epoch": 0.3726968324171397, "grad_norm": 1.4285595417022705, "learning_rate": 0.00014447425328379843, "loss": 1.0123, "step": 10407 }, { "epoch": 0.37273264454670796, "grad_norm": 1.7585430145263672, "learning_rate": 0.00014446386423057022, "loss": 1.2697, "step": 10408 }, { "epoch": 0.3727684566762762, "grad_norm": 2.169542074203491, "learning_rate": 0.00014445347457913807, "loss": 1.3233, "step": 10409 }, { "epoch": 0.37280426880584455, "grad_norm": 1.826337456703186, "learning_rate": 0.00014444308432964175, "loss": 1.2875, "step": 10410 }, { "epoch": 0.3728400809354128, "grad_norm": 2.5263783931732178, "learning_rate": 0.00014443269348222109, "loss": 1.7066, "step": 10411 }, { "epoch": 0.3728758930649811, "grad_norm": 1.9127196073532104, "learning_rate": 0.00014442230203701582, "loss": 1.4219, "step": 10412 }, { "epoch": 0.3729117051945494, "grad_norm": 1.6260210275650024, "learning_rate": 0.0001444119099941658, "loss": 1.5465, "step": 10413 }, { "epoch": 0.3729475173241177, "grad_norm": 1.7325187921524048, "learning_rate": 0.0001444015173538108, "loss": 1.5095, "step": 10414 }, { "epoch": 0.37298332945368595, "grad_norm": 1.852473258972168, "learning_rate": 0.00014439112411609065, "loss": 1.4091, "step": 10415 }, { "epoch": 0.3730191415832542, "grad_norm": 2.1807992458343506, "learning_rate": 0.00014438073028114523, "loss": 1.3722, "step": 10416 }, { "epoch": 0.37305495371282255, "grad_norm": 1.6915746927261353, "learning_rate": 0.00014437033584911428, "loss": 1.638, "step": 10417 }, { "epoch": 0.3730907658423908, "grad_norm": 2.2476296424865723, "learning_rate": 0.00014435994082013772, "loss": 1.4248, "step": 10418 }, { "epoch": 0.3731265779719591, "grad_norm": 1.4829312562942505, "learning_rate": 0.00014434954519435537, "loss": 1.599, "step": 10419 }, { "epoch": 0.3731623901015274, "grad_norm": 2.0151619911193848, "learning_rate": 0.0001443391489719071, "loss": 1.3809, "step": 10420 }, { "epoch": 0.3731982022310957, "grad_norm": 2.2441935539245605, "learning_rate": 0.0001443287521529328, "loss": 1.5466, "step": 10421 }, { "epoch": 0.37323401436066395, "grad_norm": 1.6749489307403564, "learning_rate": 0.00014431835473757227, "loss": 1.1317, "step": 10422 }, { "epoch": 0.3732698264902322, "grad_norm": 1.493473768234253, "learning_rate": 0.0001443079567259655, "loss": 1.5646, "step": 10423 }, { "epoch": 0.37330563861980054, "grad_norm": 2.4875717163085938, "learning_rate": 0.00014429755811825226, "loss": 1.505, "step": 10424 }, { "epoch": 0.3733414507493688, "grad_norm": 1.3609141111373901, "learning_rate": 0.00014428715891457255, "loss": 1.3917, "step": 10425 }, { "epoch": 0.3733772628789371, "grad_norm": 1.8194928169250488, "learning_rate": 0.00014427675911506623, "loss": 1.3385, "step": 10426 }, { "epoch": 0.3734130750085054, "grad_norm": 1.7996721267700195, "learning_rate": 0.00014426635871987327, "loss": 1.5479, "step": 10427 }, { "epoch": 0.37344888713807367, "grad_norm": 2.608707904815674, "learning_rate": 0.0001442559577291335, "loss": 1.3896, "step": 10428 }, { "epoch": 0.37348469926764194, "grad_norm": 1.6226502656936646, "learning_rate": 0.00014424555614298693, "loss": 1.4873, "step": 10429 }, { "epoch": 0.3735205113972102, "grad_norm": 1.5772932767868042, "learning_rate": 0.0001442351539615735, "loss": 1.2307, "step": 10430 }, { "epoch": 0.37355632352677853, "grad_norm": 1.3217772245407104, "learning_rate": 0.00014422475118503307, "loss": 1.756, "step": 10431 }, { "epoch": 0.3735921356563468, "grad_norm": 1.4471333026885986, "learning_rate": 0.0001442143478135057, "loss": 1.3107, "step": 10432 }, { "epoch": 0.37362794778591507, "grad_norm": 1.474479079246521, "learning_rate": 0.00014420394384713129, "loss": 1.5484, "step": 10433 }, { "epoch": 0.3736637599154834, "grad_norm": 1.8751987218856812, "learning_rate": 0.00014419353928604988, "loss": 1.4065, "step": 10434 }, { "epoch": 0.37369957204505166, "grad_norm": 2.282353639602661, "learning_rate": 0.00014418313413040138, "loss": 1.1441, "step": 10435 }, { "epoch": 0.37373538417461993, "grad_norm": 2.0015993118286133, "learning_rate": 0.00014417272838032578, "loss": 1.5908, "step": 10436 }, { "epoch": 0.3737711963041882, "grad_norm": 2.5349526405334473, "learning_rate": 0.00014416232203596312, "loss": 1.5447, "step": 10437 }, { "epoch": 0.3738070084337565, "grad_norm": 1.7229876518249512, "learning_rate": 0.00014415191509745338, "loss": 1.3676, "step": 10438 }, { "epoch": 0.3738428205633248, "grad_norm": 2.226806640625, "learning_rate": 0.0001441415075649366, "loss": 1.4815, "step": 10439 }, { "epoch": 0.37387863269289306, "grad_norm": 3.008434772491455, "learning_rate": 0.00014413109943855275, "loss": 1.2429, "step": 10440 }, { "epoch": 0.3739144448224614, "grad_norm": 1.4525420665740967, "learning_rate": 0.00014412069071844186, "loss": 1.4308, "step": 10441 }, { "epoch": 0.37395025695202966, "grad_norm": 1.8240820169448853, "learning_rate": 0.00014411028140474402, "loss": 1.6106, "step": 10442 }, { "epoch": 0.3739860690815979, "grad_norm": 1.7192800045013428, "learning_rate": 0.0001440998714975992, "loss": 1.6079, "step": 10443 }, { "epoch": 0.3740218812111662, "grad_norm": 1.9115632772445679, "learning_rate": 0.00014408946099714754, "loss": 1.6781, "step": 10444 }, { "epoch": 0.3740576933407345, "grad_norm": 1.8034567832946777, "learning_rate": 0.00014407904990352904, "loss": 1.4781, "step": 10445 }, { "epoch": 0.3740935054703028, "grad_norm": 1.9719936847686768, "learning_rate": 0.00014406863821688374, "loss": 1.7442, "step": 10446 }, { "epoch": 0.37412931759987106, "grad_norm": 1.7410471439361572, "learning_rate": 0.00014405822593735183, "loss": 1.6857, "step": 10447 }, { "epoch": 0.3741651297294394, "grad_norm": 1.7776700258255005, "learning_rate": 0.0001440478130650733, "loss": 1.1856, "step": 10448 }, { "epoch": 0.37420094185900765, "grad_norm": 1.678805947303772, "learning_rate": 0.00014403739960018824, "loss": 1.3772, "step": 10449 }, { "epoch": 0.3742367539885759, "grad_norm": 1.3966861963272095, "learning_rate": 0.00014402698554283675, "loss": 1.6829, "step": 10450 }, { "epoch": 0.3742725661181442, "grad_norm": 2.3683388233184814, "learning_rate": 0.00014401657089315904, "loss": 1.6944, "step": 10451 }, { "epoch": 0.3743083782477125, "grad_norm": 2.4242324829101562, "learning_rate": 0.00014400615565129507, "loss": 1.4101, "step": 10452 }, { "epoch": 0.3743441903772808, "grad_norm": 2.579834461212158, "learning_rate": 0.00014399573981738507, "loss": 1.3847, "step": 10453 }, { "epoch": 0.37438000250684905, "grad_norm": 1.703393578529358, "learning_rate": 0.00014398532339156912, "loss": 1.4959, "step": 10454 }, { "epoch": 0.3744158146364174, "grad_norm": 2.0507161617279053, "learning_rate": 0.00014397490637398742, "loss": 1.2748, "step": 10455 }, { "epoch": 0.37445162676598565, "grad_norm": 1.2460108995437622, "learning_rate": 0.00014396448876478007, "loss": 1.5361, "step": 10456 }, { "epoch": 0.3744874388955539, "grad_norm": 1.6622123718261719, "learning_rate": 0.00014395407056408722, "loss": 1.5679, "step": 10457 }, { "epoch": 0.3745232510251222, "grad_norm": 2.1253249645233154, "learning_rate": 0.00014394365177204904, "loss": 1.7294, "step": 10458 }, { "epoch": 0.3745590631546905, "grad_norm": 2.055772542953491, "learning_rate": 0.00014393323238880571, "loss": 1.2525, "step": 10459 }, { "epoch": 0.3745948752842588, "grad_norm": 1.6590421199798584, "learning_rate": 0.00014392281241449743, "loss": 1.4193, "step": 10460 }, { "epoch": 0.37463068741382705, "grad_norm": 1.8132797479629517, "learning_rate": 0.00014391239184926433, "loss": 1.4351, "step": 10461 }, { "epoch": 0.37466649954339537, "grad_norm": 1.3465622663497925, "learning_rate": 0.00014390197069324667, "loss": 1.6417, "step": 10462 }, { "epoch": 0.37470231167296364, "grad_norm": 1.4198817014694214, "learning_rate": 0.0001438915489465846, "loss": 1.3433, "step": 10463 }, { "epoch": 0.3747381238025319, "grad_norm": 2.0465641021728516, "learning_rate": 0.0001438811266094184, "loss": 1.8104, "step": 10464 }, { "epoch": 0.3747739359321002, "grad_norm": 1.4621689319610596, "learning_rate": 0.0001438707036818882, "loss": 1.5587, "step": 10465 }, { "epoch": 0.3748097480616685, "grad_norm": 1.6371957063674927, "learning_rate": 0.00014386028016413426, "loss": 1.4263, "step": 10466 }, { "epoch": 0.37484556019123677, "grad_norm": 2.203371286392212, "learning_rate": 0.00014384985605629685, "loss": 1.5286, "step": 10467 }, { "epoch": 0.37488137232080504, "grad_norm": 1.7377517223358154, "learning_rate": 0.0001438394313585162, "loss": 1.3761, "step": 10468 }, { "epoch": 0.37491718445037336, "grad_norm": 1.7066103219985962, "learning_rate": 0.00014382900607093254, "loss": 1.6471, "step": 10469 }, { "epoch": 0.37495299657994163, "grad_norm": 3.365816593170166, "learning_rate": 0.00014381858019368613, "loss": 1.805, "step": 10470 }, { "epoch": 0.3749888087095099, "grad_norm": 1.8790528774261475, "learning_rate": 0.00014380815372691728, "loss": 1.3121, "step": 10471 }, { "epoch": 0.37502462083907817, "grad_norm": 1.5260752439498901, "learning_rate": 0.00014379772667076618, "loss": 1.7436, "step": 10472 }, { "epoch": 0.3750604329686465, "grad_norm": 1.6847316026687622, "learning_rate": 0.0001437872990253732, "loss": 1.5917, "step": 10473 }, { "epoch": 0.37509624509821476, "grad_norm": 1.5153547525405884, "learning_rate": 0.0001437768707908786, "loss": 1.5685, "step": 10474 }, { "epoch": 0.37513205722778303, "grad_norm": 1.3384404182434082, "learning_rate": 0.00014376644196742263, "loss": 1.3586, "step": 10475 }, { "epoch": 0.37516786935735136, "grad_norm": 1.3099168539047241, "learning_rate": 0.00014375601255514565, "loss": 1.1538, "step": 10476 }, { "epoch": 0.3752036814869196, "grad_norm": 2.159562110900879, "learning_rate": 0.00014374558255418797, "loss": 1.3775, "step": 10477 }, { "epoch": 0.3752394936164879, "grad_norm": 1.8578730821609497, "learning_rate": 0.00014373515196468991, "loss": 1.4176, "step": 10478 }, { "epoch": 0.37527530574605616, "grad_norm": 1.4514464139938354, "learning_rate": 0.00014372472078679177, "loss": 1.2886, "step": 10479 }, { "epoch": 0.3753111178756245, "grad_norm": 2.7915217876434326, "learning_rate": 0.00014371428902063395, "loss": 1.6081, "step": 10480 }, { "epoch": 0.37534693000519276, "grad_norm": 1.7059495449066162, "learning_rate": 0.00014370385666635674, "loss": 1.3397, "step": 10481 }, { "epoch": 0.375382742134761, "grad_norm": 1.5151960849761963, "learning_rate": 0.00014369342372410053, "loss": 1.4587, "step": 10482 }, { "epoch": 0.37541855426432935, "grad_norm": 1.5763031244277954, "learning_rate": 0.00014368299019400563, "loss": 1.0639, "step": 10483 }, { "epoch": 0.3754543663938976, "grad_norm": 1.905612587928772, "learning_rate": 0.0001436725560762125, "loss": 1.7506, "step": 10484 }, { "epoch": 0.3754901785234659, "grad_norm": 1.5665966272354126, "learning_rate": 0.0001436621213708614, "loss": 1.4628, "step": 10485 }, { "epoch": 0.37552599065303416, "grad_norm": 1.8243300914764404, "learning_rate": 0.0001436516860780928, "loss": 1.5423, "step": 10486 }, { "epoch": 0.3755618027826025, "grad_norm": 1.7763745784759521, "learning_rate": 0.00014364125019804708, "loss": 1.8964, "step": 10487 }, { "epoch": 0.37559761491217075, "grad_norm": 2.380751848220825, "learning_rate": 0.00014363081373086462, "loss": 1.7111, "step": 10488 }, { "epoch": 0.375633427041739, "grad_norm": 2.1618824005126953, "learning_rate": 0.00014362037667668584, "loss": 1.2592, "step": 10489 }, { "epoch": 0.37566923917130735, "grad_norm": 2.174567222595215, "learning_rate": 0.00014360993903565116, "loss": 1.8956, "step": 10490 }, { "epoch": 0.3757050513008756, "grad_norm": 1.3785721063613892, "learning_rate": 0.00014359950080790101, "loss": 1.5306, "step": 10491 }, { "epoch": 0.3757408634304439, "grad_norm": 1.9032706022262573, "learning_rate": 0.0001435890619935758, "loss": 1.3604, "step": 10492 }, { "epoch": 0.37577667556001215, "grad_norm": 1.70711088180542, "learning_rate": 0.00014357862259281603, "loss": 1.7252, "step": 10493 }, { "epoch": 0.3758124876895805, "grad_norm": 1.943627119064331, "learning_rate": 0.00014356818260576206, "loss": 1.4448, "step": 10494 }, { "epoch": 0.37584829981914875, "grad_norm": 1.4574229717254639, "learning_rate": 0.0001435577420325544, "loss": 1.5815, "step": 10495 }, { "epoch": 0.375884111948717, "grad_norm": 1.5903644561767578, "learning_rate": 0.0001435473008733335, "loss": 1.5568, "step": 10496 }, { "epoch": 0.37591992407828534, "grad_norm": 1.8567113876342773, "learning_rate": 0.00014353685912823987, "loss": 1.2216, "step": 10497 }, { "epoch": 0.3759557362078536, "grad_norm": 1.382388949394226, "learning_rate": 0.00014352641679741393, "loss": 1.6755, "step": 10498 }, { "epoch": 0.3759915483374219, "grad_norm": 1.329992651939392, "learning_rate": 0.0001435159738809962, "loss": 1.2925, "step": 10499 }, { "epoch": 0.37602736046699015, "grad_norm": 2.657209873199463, "learning_rate": 0.0001435055303791272, "loss": 1.673, "step": 10500 }, { "epoch": 0.37606317259655847, "grad_norm": 1.9933116436004639, "learning_rate": 0.00014349508629194738, "loss": 1.4519, "step": 10501 }, { "epoch": 0.37609898472612674, "grad_norm": 1.554787516593933, "learning_rate": 0.00014348464161959728, "loss": 1.6938, "step": 10502 }, { "epoch": 0.376134796855695, "grad_norm": 1.7749775648117065, "learning_rate": 0.0001434741963622174, "loss": 1.3281, "step": 10503 }, { "epoch": 0.37617060898526333, "grad_norm": 1.529089331626892, "learning_rate": 0.00014346375051994833, "loss": 1.3901, "step": 10504 }, { "epoch": 0.3762064211148316, "grad_norm": 1.5324797630310059, "learning_rate": 0.00014345330409293053, "loss": 1.4895, "step": 10505 }, { "epoch": 0.37624223324439987, "grad_norm": 1.7260066270828247, "learning_rate": 0.0001434428570813046, "loss": 1.5439, "step": 10506 }, { "epoch": 0.37627804537396814, "grad_norm": 1.5831547975540161, "learning_rate": 0.00014343240948521104, "loss": 1.4157, "step": 10507 }, { "epoch": 0.37631385750353646, "grad_norm": 1.6812795400619507, "learning_rate": 0.00014342196130479043, "loss": 1.6985, "step": 10508 }, { "epoch": 0.37634966963310473, "grad_norm": 1.7851736545562744, "learning_rate": 0.0001434115125401834, "loss": 1.6871, "step": 10509 }, { "epoch": 0.376385481762673, "grad_norm": 1.9314453601837158, "learning_rate": 0.00014340106319153038, "loss": 1.6787, "step": 10510 }, { "epoch": 0.3764212938922413, "grad_norm": 1.592041254043579, "learning_rate": 0.0001433906132589721, "loss": 1.3062, "step": 10511 }, { "epoch": 0.3764571060218096, "grad_norm": 1.572988510131836, "learning_rate": 0.00014338016274264905, "loss": 1.7478, "step": 10512 }, { "epoch": 0.37649291815137786, "grad_norm": 2.4121146202087402, "learning_rate": 0.0001433697116427019, "loss": 1.6346, "step": 10513 }, { "epoch": 0.37652873028094613, "grad_norm": 1.4479660987854004, "learning_rate": 0.0001433592599592712, "loss": 1.4558, "step": 10514 }, { "epoch": 0.37656454241051446, "grad_norm": 1.7201353311538696, "learning_rate": 0.00014334880769249758, "loss": 1.4518, "step": 10515 }, { "epoch": 0.3766003545400827, "grad_norm": 1.6340991258621216, "learning_rate": 0.00014333835484252167, "loss": 1.6044, "step": 10516 }, { "epoch": 0.376636166669651, "grad_norm": 1.3527189493179321, "learning_rate": 0.00014332790140948414, "loss": 1.4002, "step": 10517 }, { "epoch": 0.3766719787992193, "grad_norm": 2.4372007846832275, "learning_rate": 0.00014331744739352556, "loss": 1.747, "step": 10518 }, { "epoch": 0.3767077909287876, "grad_norm": 1.1843258142471313, "learning_rate": 0.0001433069927947866, "loss": 1.5292, "step": 10519 }, { "epoch": 0.37674360305835586, "grad_norm": 2.6395835876464844, "learning_rate": 0.0001432965376134079, "loss": 1.8212, "step": 10520 }, { "epoch": 0.3767794151879241, "grad_norm": 1.5669384002685547, "learning_rate": 0.00014328608184953012, "loss": 1.3947, "step": 10521 }, { "epoch": 0.37681522731749245, "grad_norm": 1.5199379920959473, "learning_rate": 0.000143275625503294, "loss": 1.4586, "step": 10522 }, { "epoch": 0.3768510394470607, "grad_norm": 1.574935793876648, "learning_rate": 0.0001432651685748401, "loss": 1.4656, "step": 10523 }, { "epoch": 0.376886851576629, "grad_norm": 1.8273627758026123, "learning_rate": 0.0001432547110643092, "loss": 1.596, "step": 10524 }, { "epoch": 0.3769226637061973, "grad_norm": 1.7247346639633179, "learning_rate": 0.00014324425297184193, "loss": 1.555, "step": 10525 }, { "epoch": 0.3769584758357656, "grad_norm": 1.646228551864624, "learning_rate": 0.00014323379429757906, "loss": 1.6124, "step": 10526 }, { "epoch": 0.37699428796533385, "grad_norm": 1.649630069732666, "learning_rate": 0.00014322333504166124, "loss": 1.8379, "step": 10527 }, { "epoch": 0.3770301000949021, "grad_norm": 2.7177631855010986, "learning_rate": 0.00014321287520422917, "loss": 1.3449, "step": 10528 }, { "epoch": 0.37706591222447045, "grad_norm": 1.322988748550415, "learning_rate": 0.00014320241478542363, "loss": 1.4162, "step": 10529 }, { "epoch": 0.3771017243540387, "grad_norm": 2.104003429412842, "learning_rate": 0.0001431919537853853, "loss": 1.7102, "step": 10530 }, { "epoch": 0.377137536483607, "grad_norm": 1.8676283359527588, "learning_rate": 0.000143181492204255, "loss": 1.4098, "step": 10531 }, { "epoch": 0.3771733486131753, "grad_norm": 1.5684523582458496, "learning_rate": 0.0001431710300421734, "loss": 1.5385, "step": 10532 }, { "epoch": 0.3772091607427436, "grad_norm": 1.7146109342575073, "learning_rate": 0.00014316056729928126, "loss": 1.7981, "step": 10533 }, { "epoch": 0.37724497287231185, "grad_norm": 1.6780128479003906, "learning_rate": 0.00014315010397571937, "loss": 1.4944, "step": 10534 }, { "epoch": 0.3772807850018801, "grad_norm": 3.0013227462768555, "learning_rate": 0.0001431396400716285, "loss": 2.0381, "step": 10535 }, { "epoch": 0.37731659713144844, "grad_norm": 1.5567467212677002, "learning_rate": 0.00014312917558714943, "loss": 1.4914, "step": 10536 }, { "epoch": 0.3773524092610167, "grad_norm": 1.4302515983581543, "learning_rate": 0.00014311871052242293, "loss": 1.5742, "step": 10537 }, { "epoch": 0.377388221390585, "grad_norm": 2.07220196723938, "learning_rate": 0.00014310824487758975, "loss": 1.5267, "step": 10538 }, { "epoch": 0.3774240335201533, "grad_norm": 1.5602160692214966, "learning_rate": 0.00014309777865279078, "loss": 1.1705, "step": 10539 }, { "epoch": 0.37745984564972157, "grad_norm": 1.5262706279754639, "learning_rate": 0.00014308731184816678, "loss": 1.6653, "step": 10540 }, { "epoch": 0.37749565777928984, "grad_norm": 1.5797264575958252, "learning_rate": 0.00014307684446385855, "loss": 1.4342, "step": 10541 }, { "epoch": 0.3775314699088581, "grad_norm": 1.70090651512146, "learning_rate": 0.000143066376500007, "loss": 1.7305, "step": 10542 }, { "epoch": 0.37756728203842643, "grad_norm": 1.5267583131790161, "learning_rate": 0.00014305590795675286, "loss": 1.5904, "step": 10543 }, { "epoch": 0.3776030941679947, "grad_norm": 1.5385205745697021, "learning_rate": 0.00014304543883423708, "loss": 1.7465, "step": 10544 }, { "epoch": 0.37763890629756297, "grad_norm": 2.824392795562744, "learning_rate": 0.0001430349691326004, "loss": 1.4748, "step": 10545 }, { "epoch": 0.3776747184271313, "grad_norm": 1.8800132274627686, "learning_rate": 0.00014302449885198373, "loss": 1.4558, "step": 10546 }, { "epoch": 0.37771053055669956, "grad_norm": 1.483883023262024, "learning_rate": 0.00014301402799252793, "loss": 1.384, "step": 10547 }, { "epoch": 0.37774634268626783, "grad_norm": 1.6904969215393066, "learning_rate": 0.00014300355655437385, "loss": 1.6016, "step": 10548 }, { "epoch": 0.3777821548158361, "grad_norm": 1.5325121879577637, "learning_rate": 0.00014299308453766238, "loss": 1.419, "step": 10549 }, { "epoch": 0.3778179669454044, "grad_norm": 1.5163652896881104, "learning_rate": 0.00014298261194253443, "loss": 1.6518, "step": 10550 }, { "epoch": 0.3778537790749727, "grad_norm": 1.606885313987732, "learning_rate": 0.00014297213876913087, "loss": 1.6129, "step": 10551 }, { "epoch": 0.37788959120454096, "grad_norm": 2.0452921390533447, "learning_rate": 0.00014296166501759263, "loss": 1.5371, "step": 10552 }, { "epoch": 0.3779254033341093, "grad_norm": 1.4756042957305908, "learning_rate": 0.00014295119068806063, "loss": 1.4163, "step": 10553 }, { "epoch": 0.37796121546367756, "grad_norm": 1.4416539669036865, "learning_rate": 0.00014294071578067568, "loss": 1.5408, "step": 10554 }, { "epoch": 0.3779970275932458, "grad_norm": 1.5078434944152832, "learning_rate": 0.00014293024029557886, "loss": 1.2568, "step": 10555 }, { "epoch": 0.3780328397228141, "grad_norm": 1.6258286237716675, "learning_rate": 0.000142919764232911, "loss": 1.8241, "step": 10556 }, { "epoch": 0.3780686518523824, "grad_norm": 1.5990259647369385, "learning_rate": 0.0001429092875928131, "loss": 1.4475, "step": 10557 }, { "epoch": 0.3781044639819507, "grad_norm": 1.7794666290283203, "learning_rate": 0.00014289881037542605, "loss": 1.5112, "step": 10558 }, { "epoch": 0.37814027611151896, "grad_norm": 1.70427668094635, "learning_rate": 0.00014288833258089086, "loss": 1.4345, "step": 10559 }, { "epoch": 0.3781760882410873, "grad_norm": 1.3057318925857544, "learning_rate": 0.00014287785420934846, "loss": 1.4043, "step": 10560 }, { "epoch": 0.37821190037065555, "grad_norm": 1.8462861776351929, "learning_rate": 0.0001428673752609399, "loss": 1.131, "step": 10561 }, { "epoch": 0.3782477125002238, "grad_norm": 1.4198931455612183, "learning_rate": 0.00014285689573580607, "loss": 1.5534, "step": 10562 }, { "epoch": 0.3782835246297921, "grad_norm": 1.9457460641860962, "learning_rate": 0.00014284641563408796, "loss": 1.711, "step": 10563 }, { "epoch": 0.3783193367593604, "grad_norm": 1.85354483127594, "learning_rate": 0.00014283593495592663, "loss": 1.6055, "step": 10564 }, { "epoch": 0.3783551488889287, "grad_norm": 3.7298476696014404, "learning_rate": 0.000142825453701463, "loss": 1.5535, "step": 10565 }, { "epoch": 0.37839096101849695, "grad_norm": 1.84163498878479, "learning_rate": 0.00014281497187083818, "loss": 1.3187, "step": 10566 }, { "epoch": 0.3784267731480653, "grad_norm": 1.7626897096633911, "learning_rate": 0.00014280448946419312, "loss": 1.4398, "step": 10567 }, { "epoch": 0.37846258527763355, "grad_norm": 1.497876524925232, "learning_rate": 0.0001427940064816689, "loss": 1.5085, "step": 10568 }, { "epoch": 0.3784983974072018, "grad_norm": 2.1282432079315186, "learning_rate": 0.00014278352292340646, "loss": 1.3299, "step": 10569 }, { "epoch": 0.3785342095367701, "grad_norm": 1.8996422290802002, "learning_rate": 0.00014277303878954694, "loss": 1.7137, "step": 10570 }, { "epoch": 0.3785700216663384, "grad_norm": 2.442035675048828, "learning_rate": 0.00014276255408023138, "loss": 1.4634, "step": 10571 }, { "epoch": 0.3786058337959067, "grad_norm": 1.3051122426986694, "learning_rate": 0.00014275206879560079, "loss": 1.4763, "step": 10572 }, { "epoch": 0.37864164592547495, "grad_norm": 1.575643539428711, "learning_rate": 0.00014274158293579628, "loss": 1.7568, "step": 10573 }, { "epoch": 0.37867745805504327, "grad_norm": 1.7613773345947266, "learning_rate": 0.00014273109650095886, "loss": 1.4811, "step": 10574 }, { "epoch": 0.37871327018461154, "grad_norm": 1.501792073249817, "learning_rate": 0.0001427206094912297, "loss": 1.4062, "step": 10575 }, { "epoch": 0.3787490823141798, "grad_norm": 1.534451961517334, "learning_rate": 0.00014271012190674983, "loss": 1.3444, "step": 10576 }, { "epoch": 0.3787848944437481, "grad_norm": 1.2604267597198486, "learning_rate": 0.00014269963374766034, "loss": 1.2716, "step": 10577 }, { "epoch": 0.3788207065733164, "grad_norm": 2.2848007678985596, "learning_rate": 0.00014268914501410239, "loss": 1.5, "step": 10578 }, { "epoch": 0.37885651870288467, "grad_norm": 1.4307903051376343, "learning_rate": 0.00014267865570621706, "loss": 1.5404, "step": 10579 }, { "epoch": 0.37889233083245294, "grad_norm": 2.649573802947998, "learning_rate": 0.00014266816582414547, "loss": 1.4351, "step": 10580 }, { "epoch": 0.37892814296202126, "grad_norm": 2.3270211219787598, "learning_rate": 0.00014265767536802873, "loss": 1.4689, "step": 10581 }, { "epoch": 0.37896395509158953, "grad_norm": 1.3619784116744995, "learning_rate": 0.000142647184338008, "loss": 1.5399, "step": 10582 }, { "epoch": 0.3789997672211578, "grad_norm": 1.9002552032470703, "learning_rate": 0.0001426366927342244, "loss": 1.6398, "step": 10583 }, { "epoch": 0.37903557935072607, "grad_norm": 1.818941593170166, "learning_rate": 0.0001426262005568191, "loss": 1.3925, "step": 10584 }, { "epoch": 0.3790713914802944, "grad_norm": 1.4739636182785034, "learning_rate": 0.00014261570780593327, "loss": 1.4677, "step": 10585 }, { "epoch": 0.37910720360986266, "grad_norm": 1.7951068878173828, "learning_rate": 0.00014260521448170805, "loss": 1.5913, "step": 10586 }, { "epoch": 0.37914301573943093, "grad_norm": 1.6509017944335938, "learning_rate": 0.0001425947205842846, "loss": 1.3659, "step": 10587 }, { "epoch": 0.37917882786899926, "grad_norm": 1.6819849014282227, "learning_rate": 0.00014258422611380418, "loss": 1.6058, "step": 10588 }, { "epoch": 0.3792146399985675, "grad_norm": 2.2275047302246094, "learning_rate": 0.0001425737310704079, "loss": 1.1905, "step": 10589 }, { "epoch": 0.3792504521281358, "grad_norm": 1.8153003454208374, "learning_rate": 0.000142563235454237, "loss": 1.8863, "step": 10590 }, { "epoch": 0.37928626425770406, "grad_norm": 1.4210957288742065, "learning_rate": 0.00014255273926543264, "loss": 1.4082, "step": 10591 }, { "epoch": 0.3793220763872724, "grad_norm": 1.9765852689743042, "learning_rate": 0.0001425422425041361, "loss": 1.7252, "step": 10592 }, { "epoch": 0.37935788851684066, "grad_norm": 1.3808059692382812, "learning_rate": 0.00014253174517048854, "loss": 1.3727, "step": 10593 }, { "epoch": 0.3793937006464089, "grad_norm": 1.5124943256378174, "learning_rate": 0.00014252124726463121, "loss": 1.456, "step": 10594 }, { "epoch": 0.37942951277597725, "grad_norm": 1.4630928039550781, "learning_rate": 0.00014251074878670537, "loss": 1.6548, "step": 10595 }, { "epoch": 0.3794653249055455, "grad_norm": 2.346756935119629, "learning_rate": 0.00014250024973685218, "loss": 1.6495, "step": 10596 }, { "epoch": 0.3795011370351138, "grad_norm": 1.8365200757980347, "learning_rate": 0.000142489750115213, "loss": 1.4613, "step": 10597 }, { "epoch": 0.37953694916468206, "grad_norm": 1.351531982421875, "learning_rate": 0.00014247924992192906, "loss": 1.497, "step": 10598 }, { "epoch": 0.3795727612942504, "grad_norm": 1.3425579071044922, "learning_rate": 0.00014246874915714157, "loss": 1.0996, "step": 10599 }, { "epoch": 0.37960857342381865, "grad_norm": 1.2506022453308105, "learning_rate": 0.00014245824782099185, "loss": 1.3726, "step": 10600 }, { "epoch": 0.3796443855533869, "grad_norm": 1.398263692855835, "learning_rate": 0.00014244774591362118, "loss": 1.4526, "step": 10601 }, { "epoch": 0.3796801976829552, "grad_norm": 1.5792254209518433, "learning_rate": 0.00014243724343517082, "loss": 1.4306, "step": 10602 }, { "epoch": 0.3797160098125235, "grad_norm": 1.728993535041809, "learning_rate": 0.0001424267403857821, "loss": 1.2494, "step": 10603 }, { "epoch": 0.3797518219420918, "grad_norm": 1.8500906229019165, "learning_rate": 0.00014241623676559633, "loss": 1.2789, "step": 10604 }, { "epoch": 0.37978763407166005, "grad_norm": 2.3870582580566406, "learning_rate": 0.0001424057325747548, "loss": 1.5109, "step": 10605 }, { "epoch": 0.3798234462012284, "grad_norm": 2.648805856704712, "learning_rate": 0.00014239522781339884, "loss": 1.3092, "step": 10606 }, { "epoch": 0.37985925833079665, "grad_norm": 1.6133131980895996, "learning_rate": 0.00014238472248166977, "loss": 1.5906, "step": 10607 }, { "epoch": 0.3798950704603649, "grad_norm": 1.9158686399459839, "learning_rate": 0.00014237421657970894, "loss": 1.7831, "step": 10608 }, { "epoch": 0.3799308825899332, "grad_norm": 1.773206353187561, "learning_rate": 0.00014236371010765766, "loss": 1.4976, "step": 10609 }, { "epoch": 0.3799666947195015, "grad_norm": 1.8020765781402588, "learning_rate": 0.00014235320306565732, "loss": 1.656, "step": 10610 }, { "epoch": 0.3800025068490698, "grad_norm": 1.7115094661712646, "learning_rate": 0.00014234269545384927, "loss": 1.7404, "step": 10611 }, { "epoch": 0.38003831897863805, "grad_norm": 1.5301717519760132, "learning_rate": 0.00014233218727237489, "loss": 1.6062, "step": 10612 }, { "epoch": 0.38007413110820637, "grad_norm": 1.6753218173980713, "learning_rate": 0.00014232167852137547, "loss": 1.6029, "step": 10613 }, { "epoch": 0.38010994323777464, "grad_norm": 1.724470853805542, "learning_rate": 0.00014231116920099252, "loss": 1.4289, "step": 10614 }, { "epoch": 0.3801457553673429, "grad_norm": 1.236194133758545, "learning_rate": 0.00014230065931136735, "loss": 1.4111, "step": 10615 }, { "epoch": 0.3801815674969112, "grad_norm": 2.511920213699341, "learning_rate": 0.00014229014885264136, "loss": 1.4207, "step": 10616 }, { "epoch": 0.3802173796264795, "grad_norm": 1.4968230724334717, "learning_rate": 0.00014227963782495598, "loss": 1.5417, "step": 10617 }, { "epoch": 0.38025319175604777, "grad_norm": 1.8402019739151, "learning_rate": 0.0001422691262284526, "loss": 1.5169, "step": 10618 }, { "epoch": 0.38028900388561604, "grad_norm": 1.912214756011963, "learning_rate": 0.00014225861406327265, "loss": 1.5186, "step": 10619 }, { "epoch": 0.38032481601518436, "grad_norm": 1.7003743648529053, "learning_rate": 0.00014224810132955755, "loss": 1.4935, "step": 10620 }, { "epoch": 0.38036062814475263, "grad_norm": 1.291312575340271, "learning_rate": 0.00014223758802744878, "loss": 1.4836, "step": 10621 }, { "epoch": 0.3803964402743209, "grad_norm": 2.281581163406372, "learning_rate": 0.0001422270741570877, "loss": 1.3742, "step": 10622 }, { "epoch": 0.38043225240388917, "grad_norm": 1.739435076713562, "learning_rate": 0.00014221655971861582, "loss": 1.5536, "step": 10623 }, { "epoch": 0.3804680645334575, "grad_norm": 2.075655937194824, "learning_rate": 0.0001422060447121746, "loss": 1.4077, "step": 10624 }, { "epoch": 0.38050387666302576, "grad_norm": 1.8676741123199463, "learning_rate": 0.0001421955291379055, "loss": 1.7129, "step": 10625 }, { "epoch": 0.38053968879259403, "grad_norm": 1.853179693222046, "learning_rate": 0.00014218501299594996, "loss": 1.369, "step": 10626 }, { "epoch": 0.38057550092216236, "grad_norm": 1.8642727136611938, "learning_rate": 0.00014217449628644947, "loss": 1.3751, "step": 10627 }, { "epoch": 0.3806113130517306, "grad_norm": 2.065969467163086, "learning_rate": 0.00014216397900954558, "loss": 1.4474, "step": 10628 }, { "epoch": 0.3806471251812989, "grad_norm": 2.014706611633301, "learning_rate": 0.00014215346116537968, "loss": 1.7028, "step": 10629 }, { "epoch": 0.38068293731086716, "grad_norm": 1.5329726934432983, "learning_rate": 0.0001421429427540934, "loss": 1.3595, "step": 10630 }, { "epoch": 0.3807187494404355, "grad_norm": 1.8910022974014282, "learning_rate": 0.00014213242377582815, "loss": 1.3986, "step": 10631 }, { "epoch": 0.38075456157000376, "grad_norm": 1.6006379127502441, "learning_rate": 0.0001421219042307255, "loss": 1.6447, "step": 10632 }, { "epoch": 0.380790373699572, "grad_norm": 1.9453397989273071, "learning_rate": 0.00014211138411892696, "loss": 1.5806, "step": 10633 }, { "epoch": 0.38082618582914035, "grad_norm": 1.7229535579681396, "learning_rate": 0.00014210086344057404, "loss": 1.6743, "step": 10634 }, { "epoch": 0.3808619979587086, "grad_norm": 1.395990252494812, "learning_rate": 0.00014209034219580833, "loss": 1.4022, "step": 10635 }, { "epoch": 0.3808978100882769, "grad_norm": 1.543500304222107, "learning_rate": 0.00014207982038477135, "loss": 1.6443, "step": 10636 }, { "epoch": 0.38093362221784516, "grad_norm": 1.8862489461898804, "learning_rate": 0.00014206929800760466, "loss": 1.8299, "step": 10637 }, { "epoch": 0.3809694343474135, "grad_norm": 1.6491189002990723, "learning_rate": 0.00014205877506444982, "loss": 1.5355, "step": 10638 }, { "epoch": 0.38100524647698175, "grad_norm": 2.0166943073272705, "learning_rate": 0.00014204825155544846, "loss": 1.2944, "step": 10639 }, { "epoch": 0.38104105860655, "grad_norm": 1.385064959526062, "learning_rate": 0.00014203772748074206, "loss": 1.5232, "step": 10640 }, { "epoch": 0.38107687073611834, "grad_norm": 1.4159349203109741, "learning_rate": 0.00014202720284047234, "loss": 1.3836, "step": 10641 }, { "epoch": 0.3811126828656866, "grad_norm": 1.7959661483764648, "learning_rate": 0.00014201667763478074, "loss": 1.7063, "step": 10642 }, { "epoch": 0.3811484949952549, "grad_norm": 1.8224889039993286, "learning_rate": 0.00014200615186380899, "loss": 1.6915, "step": 10643 }, { "epoch": 0.38118430712482315, "grad_norm": 2.0088465213775635, "learning_rate": 0.0001419956255276986, "loss": 1.6776, "step": 10644 }, { "epoch": 0.3812201192543915, "grad_norm": 1.4970310926437378, "learning_rate": 0.00014198509862659129, "loss": 1.4258, "step": 10645 }, { "epoch": 0.38125593138395975, "grad_norm": 1.4173898696899414, "learning_rate": 0.00014197457116062857, "loss": 1.7229, "step": 10646 }, { "epoch": 0.381291743513528, "grad_norm": 1.68673837184906, "learning_rate": 0.0001419640431299522, "loss": 1.4912, "step": 10647 }, { "epoch": 0.38132755564309634, "grad_norm": 2.059001922607422, "learning_rate": 0.00014195351453470374, "loss": 1.328, "step": 10648 }, { "epoch": 0.3813633677726646, "grad_norm": 1.7006314992904663, "learning_rate": 0.00014194298537502487, "loss": 1.4513, "step": 10649 }, { "epoch": 0.3813991799022329, "grad_norm": 1.4936386346817017, "learning_rate": 0.00014193245565105722, "loss": 1.6796, "step": 10650 }, { "epoch": 0.38143499203180115, "grad_norm": 1.7479816675186157, "learning_rate": 0.00014192192536294245, "loss": 1.4616, "step": 10651 }, { "epoch": 0.38147080416136947, "grad_norm": 1.7160698175430298, "learning_rate": 0.00014191139451082228, "loss": 1.6427, "step": 10652 }, { "epoch": 0.38150661629093774, "grad_norm": 1.7994788885116577, "learning_rate": 0.00014190086309483834, "loss": 1.4788, "step": 10653 }, { "epoch": 0.381542428420506, "grad_norm": 1.9248762130737305, "learning_rate": 0.00014189033111513234, "loss": 1.6344, "step": 10654 }, { "epoch": 0.38157824055007433, "grad_norm": 2.2364611625671387, "learning_rate": 0.00014187979857184597, "loss": 1.5578, "step": 10655 }, { "epoch": 0.3816140526796426, "grad_norm": 1.8766387701034546, "learning_rate": 0.00014186926546512095, "loss": 1.7108, "step": 10656 }, { "epoch": 0.38164986480921087, "grad_norm": 2.0187058448791504, "learning_rate": 0.00014185873179509893, "loss": 1.3808, "step": 10657 }, { "epoch": 0.38168567693877914, "grad_norm": 1.377492070198059, "learning_rate": 0.00014184819756192168, "loss": 1.2254, "step": 10658 }, { "epoch": 0.38172148906834746, "grad_norm": 1.88951575756073, "learning_rate": 0.00014183766276573096, "loss": 1.5138, "step": 10659 }, { "epoch": 0.38175730119791573, "grad_norm": 2.606764554977417, "learning_rate": 0.00014182712740666838, "loss": 1.9735, "step": 10660 }, { "epoch": 0.381793113327484, "grad_norm": 2.150054693222046, "learning_rate": 0.00014181659148487582, "loss": 1.38, "step": 10661 }, { "epoch": 0.3818289254570523, "grad_norm": 1.516271948814392, "learning_rate": 0.00014180605500049493, "loss": 1.7134, "step": 10662 }, { "epoch": 0.3818647375866206, "grad_norm": 1.3602635860443115, "learning_rate": 0.0001417955179536675, "loss": 1.4687, "step": 10663 }, { "epoch": 0.38190054971618886, "grad_norm": 1.6596342325210571, "learning_rate": 0.00014178498034453528, "loss": 1.5292, "step": 10664 }, { "epoch": 0.38193636184575713, "grad_norm": 1.9334921836853027, "learning_rate": 0.00014177444217324005, "loss": 1.3077, "step": 10665 }, { "epoch": 0.38197217397532546, "grad_norm": 1.7454739809036255, "learning_rate": 0.00014176390343992358, "loss": 1.722, "step": 10666 }, { "epoch": 0.3820079861048937, "grad_norm": 1.3706307411193848, "learning_rate": 0.0001417533641447277, "loss": 1.488, "step": 10667 }, { "epoch": 0.382043798234462, "grad_norm": 1.5092109441757202, "learning_rate": 0.00014174282428779412, "loss": 1.7905, "step": 10668 }, { "epoch": 0.3820796103640303, "grad_norm": 1.692136526107788, "learning_rate": 0.0001417322838692647, "loss": 1.2234, "step": 10669 }, { "epoch": 0.3821154224935986, "grad_norm": 1.8451822996139526, "learning_rate": 0.00014172174288928124, "loss": 1.5694, "step": 10670 }, { "epoch": 0.38215123462316686, "grad_norm": 1.4315675497055054, "learning_rate": 0.00014171120134798552, "loss": 1.4778, "step": 10671 }, { "epoch": 0.3821870467527351, "grad_norm": 1.8963932991027832, "learning_rate": 0.00014170065924551942, "loss": 1.2421, "step": 10672 }, { "epoch": 0.38222285888230345, "grad_norm": 1.7373079061508179, "learning_rate": 0.00014169011658202472, "loss": 1.2913, "step": 10673 }, { "epoch": 0.3822586710118717, "grad_norm": 1.6694564819335938, "learning_rate": 0.00014167957335764331, "loss": 1.6226, "step": 10674 }, { "epoch": 0.38229448314144, "grad_norm": 2.1219732761383057, "learning_rate": 0.00014166902957251696, "loss": 1.6144, "step": 10675 }, { "epoch": 0.3823302952710083, "grad_norm": 1.57712721824646, "learning_rate": 0.00014165848522678756, "loss": 1.1831, "step": 10676 }, { "epoch": 0.3823661074005766, "grad_norm": 1.6538848876953125, "learning_rate": 0.00014164794032059703, "loss": 1.7272, "step": 10677 }, { "epoch": 0.38240191953014485, "grad_norm": 1.840171217918396, "learning_rate": 0.00014163739485408716, "loss": 1.5288, "step": 10678 }, { "epoch": 0.3824377316597131, "grad_norm": 1.7260515689849854, "learning_rate": 0.00014162684882739984, "loss": 1.6227, "step": 10679 }, { "epoch": 0.38247354378928144, "grad_norm": 2.425084352493286, "learning_rate": 0.00014161630224067694, "loss": 1.4853, "step": 10680 }, { "epoch": 0.3825093559188497, "grad_norm": 1.8197312355041504, "learning_rate": 0.0001416057550940604, "loss": 1.797, "step": 10681 }, { "epoch": 0.382545168048418, "grad_norm": 1.4861470460891724, "learning_rate": 0.00014159520738769212, "loss": 1.5128, "step": 10682 }, { "epoch": 0.3825809801779863, "grad_norm": 1.9796706438064575, "learning_rate": 0.00014158465912171396, "loss": 1.574, "step": 10683 }, { "epoch": 0.3826167923075546, "grad_norm": 1.8207260370254517, "learning_rate": 0.00014157411029626783, "loss": 1.3684, "step": 10684 }, { "epoch": 0.38265260443712285, "grad_norm": 1.8487191200256348, "learning_rate": 0.0001415635609114957, "loss": 1.386, "step": 10685 }, { "epoch": 0.3826884165666911, "grad_norm": 1.9048634767532349, "learning_rate": 0.00014155301096753945, "loss": 1.496, "step": 10686 }, { "epoch": 0.38272422869625944, "grad_norm": 1.796779751777649, "learning_rate": 0.00014154246046454107, "loss": 1.4984, "step": 10687 }, { "epoch": 0.3827600408258277, "grad_norm": 1.5472294092178345, "learning_rate": 0.00014153190940264246, "loss": 1.6013, "step": 10688 }, { "epoch": 0.382795852955396, "grad_norm": 2.0484673976898193, "learning_rate": 0.00014152135778198557, "loss": 1.7283, "step": 10689 }, { "epoch": 0.3828316650849643, "grad_norm": 1.5459712743759155, "learning_rate": 0.00014151080560271235, "loss": 1.3491, "step": 10690 }, { "epoch": 0.38286747721453257, "grad_norm": 1.379616618156433, "learning_rate": 0.00014150025286496483, "loss": 1.3149, "step": 10691 }, { "epoch": 0.38290328934410084, "grad_norm": 2.17366886138916, "learning_rate": 0.0001414896995688849, "loss": 1.6789, "step": 10692 }, { "epoch": 0.3829391014736691, "grad_norm": 1.3768093585968018, "learning_rate": 0.00014147914571461455, "loss": 1.5071, "step": 10693 }, { "epoch": 0.38297491360323743, "grad_norm": 1.906646728515625, "learning_rate": 0.0001414685913022959, "loss": 1.6087, "step": 10694 }, { "epoch": 0.3830107257328057, "grad_norm": 2.058373212814331, "learning_rate": 0.00014145803633207077, "loss": 1.7719, "step": 10695 }, { "epoch": 0.38304653786237397, "grad_norm": 1.584075689315796, "learning_rate": 0.00014144748080408126, "loss": 1.4597, "step": 10696 }, { "epoch": 0.3830823499919423, "grad_norm": 1.2557836771011353, "learning_rate": 0.00014143692471846935, "loss": 1.2409, "step": 10697 }, { "epoch": 0.38311816212151056, "grad_norm": 1.7716764211654663, "learning_rate": 0.0001414263680753771, "loss": 1.6804, "step": 10698 }, { "epoch": 0.38315397425107883, "grad_norm": 1.3837485313415527, "learning_rate": 0.00014141581087494644, "loss": 1.5138, "step": 10699 }, { "epoch": 0.3831897863806471, "grad_norm": 1.3387659788131714, "learning_rate": 0.00014140525311731952, "loss": 1.4457, "step": 10700 }, { "epoch": 0.3832255985102154, "grad_norm": 1.4828484058380127, "learning_rate": 0.00014139469480263828, "loss": 1.6133, "step": 10701 }, { "epoch": 0.3832614106397837, "grad_norm": 2.200072765350342, "learning_rate": 0.00014138413593104486, "loss": 1.3238, "step": 10702 }, { "epoch": 0.38329722276935196, "grad_norm": 1.9859720468521118, "learning_rate": 0.0001413735765026813, "loss": 1.7688, "step": 10703 }, { "epoch": 0.3833330348989203, "grad_norm": 1.7335363626480103, "learning_rate": 0.00014136301651768957, "loss": 1.6909, "step": 10704 }, { "epoch": 0.38336884702848856, "grad_norm": 1.7980806827545166, "learning_rate": 0.00014135245597621184, "loss": 1.2562, "step": 10705 }, { "epoch": 0.3834046591580568, "grad_norm": 1.8698501586914062, "learning_rate": 0.00014134189487839013, "loss": 1.6418, "step": 10706 }, { "epoch": 0.3834404712876251, "grad_norm": 1.5134303569793701, "learning_rate": 0.0001413313332243666, "loss": 1.3264, "step": 10707 }, { "epoch": 0.3834762834171934, "grad_norm": 1.5431501865386963, "learning_rate": 0.00014132077101428324, "loss": 1.4467, "step": 10708 }, { "epoch": 0.3835120955467617, "grad_norm": 1.60693359375, "learning_rate": 0.00014131020824828224, "loss": 1.5158, "step": 10709 }, { "epoch": 0.38354790767632996, "grad_norm": 2.905691385269165, "learning_rate": 0.00014129964492650568, "loss": 1.7937, "step": 10710 }, { "epoch": 0.3835837198058983, "grad_norm": 1.4310888051986694, "learning_rate": 0.00014128908104909567, "loss": 1.286, "step": 10711 }, { "epoch": 0.38361953193546655, "grad_norm": 1.8025977611541748, "learning_rate": 0.00014127851661619432, "loss": 1.27, "step": 10712 }, { "epoch": 0.3836553440650348, "grad_norm": 1.9007517099380493, "learning_rate": 0.00014126795162794378, "loss": 1.5953, "step": 10713 }, { "epoch": 0.3836911561946031, "grad_norm": 2.110842704772949, "learning_rate": 0.00014125738608448618, "loss": 1.5767, "step": 10714 }, { "epoch": 0.3837269683241714, "grad_norm": 1.5326857566833496, "learning_rate": 0.00014124681998596366, "loss": 1.6487, "step": 10715 }, { "epoch": 0.3837627804537397, "grad_norm": 1.8157939910888672, "learning_rate": 0.0001412362533325184, "loss": 1.588, "step": 10716 }, { "epoch": 0.38379859258330795, "grad_norm": 1.4543850421905518, "learning_rate": 0.0001412256861242925, "loss": 1.5232, "step": 10717 }, { "epoch": 0.3838344047128763, "grad_norm": 1.69603431224823, "learning_rate": 0.00014121511836142823, "loss": 1.727, "step": 10718 }, { "epoch": 0.38387021684244454, "grad_norm": 1.8203978538513184, "learning_rate": 0.00014120455004406766, "loss": 1.6048, "step": 10719 }, { "epoch": 0.3839060289720128, "grad_norm": 1.70366370677948, "learning_rate": 0.00014119398117235304, "loss": 1.6988, "step": 10720 }, { "epoch": 0.3839418411015811, "grad_norm": 1.6187806129455566, "learning_rate": 0.00014118341174642653, "loss": 1.534, "step": 10721 }, { "epoch": 0.3839776532311494, "grad_norm": 1.8127055168151855, "learning_rate": 0.00014117284176643033, "loss": 1.429, "step": 10722 }, { "epoch": 0.3840134653607177, "grad_norm": 2.1849961280822754, "learning_rate": 0.00014116227123250668, "loss": 1.515, "step": 10723 }, { "epoch": 0.38404927749028595, "grad_norm": 1.5022850036621094, "learning_rate": 0.00014115170014479775, "loss": 1.6862, "step": 10724 }, { "epoch": 0.38408508961985427, "grad_norm": 1.709812879562378, "learning_rate": 0.0001411411285034458, "loss": 1.5168, "step": 10725 }, { "epoch": 0.38412090174942254, "grad_norm": 1.5337954759597778, "learning_rate": 0.000141130556308593, "loss": 1.595, "step": 10726 }, { "epoch": 0.3841567138789908, "grad_norm": 1.5661762952804565, "learning_rate": 0.00014111998356038162, "loss": 1.325, "step": 10727 }, { "epoch": 0.3841925260085591, "grad_norm": 2.0021166801452637, "learning_rate": 0.00014110941025895392, "loss": 1.3808, "step": 10728 }, { "epoch": 0.3842283381381274, "grad_norm": 1.3688938617706299, "learning_rate": 0.00014109883640445214, "loss": 1.366, "step": 10729 }, { "epoch": 0.38426415026769567, "grad_norm": 2.30067777633667, "learning_rate": 0.00014108826199701852, "loss": 1.3969, "step": 10730 }, { "epoch": 0.38429996239726394, "grad_norm": 2.1623682975769043, "learning_rate": 0.00014107768703679533, "loss": 1.4078, "step": 10731 }, { "epoch": 0.38433577452683226, "grad_norm": 1.4993696212768555, "learning_rate": 0.00014106711152392484, "loss": 1.499, "step": 10732 }, { "epoch": 0.38437158665640053, "grad_norm": 1.903861403465271, "learning_rate": 0.00014105653545854935, "loss": 1.5057, "step": 10733 }, { "epoch": 0.3844073987859688, "grad_norm": 1.6415305137634277, "learning_rate": 0.00014104595884081113, "loss": 1.5481, "step": 10734 }, { "epoch": 0.38444321091553707, "grad_norm": 1.8244068622589111, "learning_rate": 0.00014103538167085247, "loss": 1.5552, "step": 10735 }, { "epoch": 0.3844790230451054, "grad_norm": 1.8771268129348755, "learning_rate": 0.0001410248039488157, "loss": 1.4309, "step": 10736 }, { "epoch": 0.38451483517467366, "grad_norm": 1.5982030630111694, "learning_rate": 0.0001410142256748431, "loss": 1.4636, "step": 10737 }, { "epoch": 0.38455064730424193, "grad_norm": 1.3975290060043335, "learning_rate": 0.00014100364684907702, "loss": 1.3491, "step": 10738 }, { "epoch": 0.38458645943381026, "grad_norm": 1.9042540788650513, "learning_rate": 0.00014099306747165975, "loss": 1.2723, "step": 10739 }, { "epoch": 0.3846222715633785, "grad_norm": 1.3731106519699097, "learning_rate": 0.00014098248754273364, "loss": 1.757, "step": 10740 }, { "epoch": 0.3846580836929468, "grad_norm": 2.2348291873931885, "learning_rate": 0.000140971907062441, "loss": 1.2937, "step": 10741 }, { "epoch": 0.38469389582251506, "grad_norm": 1.5206588506698608, "learning_rate": 0.0001409613260309242, "loss": 1.3901, "step": 10742 }, { "epoch": 0.3847297079520834, "grad_norm": 1.705182671546936, "learning_rate": 0.00014095074444832561, "loss": 1.5864, "step": 10743 }, { "epoch": 0.38476552008165166, "grad_norm": 1.3100666999816895, "learning_rate": 0.0001409401623147876, "loss": 1.5685, "step": 10744 }, { "epoch": 0.3848013322112199, "grad_norm": 2.2978193759918213, "learning_rate": 0.00014092957963045245, "loss": 1.895, "step": 10745 }, { "epoch": 0.38483714434078825, "grad_norm": 1.4124058485031128, "learning_rate": 0.00014091899639546263, "loss": 1.0143, "step": 10746 }, { "epoch": 0.3848729564703565, "grad_norm": 1.691389799118042, "learning_rate": 0.00014090841260996055, "loss": 1.596, "step": 10747 }, { "epoch": 0.3849087685999248, "grad_norm": 1.6830295324325562, "learning_rate": 0.0001408978282740885, "loss": 1.3329, "step": 10748 }, { "epoch": 0.38494458072949306, "grad_norm": 1.5190984010696411, "learning_rate": 0.0001408872433879889, "loss": 1.4487, "step": 10749 }, { "epoch": 0.3849803928590614, "grad_norm": 1.6178358793258667, "learning_rate": 0.00014087665795180422, "loss": 1.4554, "step": 10750 }, { "epoch": 0.38501620498862965, "grad_norm": 1.344931721687317, "learning_rate": 0.00014086607196567682, "loss": 1.5279, "step": 10751 }, { "epoch": 0.3850520171181979, "grad_norm": 1.3446136713027954, "learning_rate": 0.00014085548542974914, "loss": 1.4228, "step": 10752 }, { "epoch": 0.38508782924776624, "grad_norm": 1.3284231424331665, "learning_rate": 0.0001408448983441636, "loss": 1.2936, "step": 10753 }, { "epoch": 0.3851236413773345, "grad_norm": 1.6570699214935303, "learning_rate": 0.00014083431070906262, "loss": 1.6028, "step": 10754 }, { "epoch": 0.3851594535069028, "grad_norm": 1.6138721704483032, "learning_rate": 0.0001408237225245887, "loss": 1.4631, "step": 10755 }, { "epoch": 0.38519526563647105, "grad_norm": 1.7377655506134033, "learning_rate": 0.00014081313379088424, "loss": 1.5583, "step": 10756 }, { "epoch": 0.3852310777660394, "grad_norm": 1.4762719869613647, "learning_rate": 0.0001408025445080917, "loss": 1.4366, "step": 10757 }, { "epoch": 0.38526688989560764, "grad_norm": 1.4237333536148071, "learning_rate": 0.00014079195467635354, "loss": 1.3627, "step": 10758 }, { "epoch": 0.3853027020251759, "grad_norm": 1.8359794616699219, "learning_rate": 0.00014078136429581227, "loss": 1.5827, "step": 10759 }, { "epoch": 0.38533851415474424, "grad_norm": 1.5892670154571533, "learning_rate": 0.00014077077336661036, "loss": 1.2771, "step": 10760 }, { "epoch": 0.3853743262843125, "grad_norm": 2.054046392440796, "learning_rate": 0.00014076018188889026, "loss": 1.5361, "step": 10761 }, { "epoch": 0.3854101384138808, "grad_norm": 1.899831771850586, "learning_rate": 0.0001407495898627945, "loss": 1.6762, "step": 10762 }, { "epoch": 0.38544595054344905, "grad_norm": 1.9101499319076538, "learning_rate": 0.00014073899728846555, "loss": 1.5444, "step": 10763 }, { "epoch": 0.38548176267301737, "grad_norm": 1.364566445350647, "learning_rate": 0.00014072840416604597, "loss": 1.4441, "step": 10764 }, { "epoch": 0.38551757480258564, "grad_norm": 1.467511773109436, "learning_rate": 0.00014071781049567825, "loss": 1.5548, "step": 10765 }, { "epoch": 0.3855533869321539, "grad_norm": 1.9000247716903687, "learning_rate": 0.0001407072162775049, "loss": 1.6866, "step": 10766 }, { "epoch": 0.38558919906172223, "grad_norm": 1.3294975757598877, "learning_rate": 0.00014069662151166846, "loss": 1.6307, "step": 10767 }, { "epoch": 0.3856250111912905, "grad_norm": 1.628848910331726, "learning_rate": 0.00014068602619831148, "loss": 1.4484, "step": 10768 }, { "epoch": 0.38566082332085877, "grad_norm": 1.6399376392364502, "learning_rate": 0.0001406754303375765, "loss": 1.7626, "step": 10769 }, { "epoch": 0.38569663545042704, "grad_norm": 1.6069952249526978, "learning_rate": 0.00014066483392960604, "loss": 1.5228, "step": 10770 }, { "epoch": 0.38573244757999536, "grad_norm": 2.2001421451568604, "learning_rate": 0.00014065423697454273, "loss": 1.5175, "step": 10771 }, { "epoch": 0.38576825970956363, "grad_norm": 1.3679170608520508, "learning_rate": 0.0001406436394725291, "loss": 1.4571, "step": 10772 }, { "epoch": 0.3858040718391319, "grad_norm": 2.004768133163452, "learning_rate": 0.00014063304142370773, "loss": 1.1387, "step": 10773 }, { "epoch": 0.3858398839687002, "grad_norm": 2.107898235321045, "learning_rate": 0.0001406224428282212, "loss": 1.7398, "step": 10774 }, { "epoch": 0.3858756960982685, "grad_norm": 1.65401029586792, "learning_rate": 0.0001406118436862121, "loss": 1.4425, "step": 10775 }, { "epoch": 0.38591150822783676, "grad_norm": 2.257986307144165, "learning_rate": 0.000140601243997823, "loss": 1.7269, "step": 10776 }, { "epoch": 0.38594732035740503, "grad_norm": 1.4186952114105225, "learning_rate": 0.00014059064376319657, "loss": 1.647, "step": 10777 }, { "epoch": 0.38598313248697336, "grad_norm": 1.5061075687408447, "learning_rate": 0.00014058004298247537, "loss": 1.4102, "step": 10778 }, { "epoch": 0.3860189446165416, "grad_norm": 2.0114200115203857, "learning_rate": 0.00014056944165580202, "loss": 1.763, "step": 10779 }, { "epoch": 0.3860547567461099, "grad_norm": 1.347678780555725, "learning_rate": 0.00014055883978331916, "loss": 1.5323, "step": 10780 }, { "epoch": 0.3860905688756782, "grad_norm": 1.6494736671447754, "learning_rate": 0.00014054823736516945, "loss": 1.7314, "step": 10781 }, { "epoch": 0.3861263810052465, "grad_norm": 1.7377164363861084, "learning_rate": 0.00014053763440149552, "loss": 1.4244, "step": 10782 }, { "epoch": 0.38616219313481476, "grad_norm": 2.1563901901245117, "learning_rate": 0.00014052703089244, "loss": 1.5343, "step": 10783 }, { "epoch": 0.386198005264383, "grad_norm": 1.365647315979004, "learning_rate": 0.00014051642683814557, "loss": 1.491, "step": 10784 }, { "epoch": 0.38623381739395135, "grad_norm": 1.7293301820755005, "learning_rate": 0.00014050582223875484, "loss": 1.5437, "step": 10785 }, { "epoch": 0.3862696295235196, "grad_norm": 2.316777229309082, "learning_rate": 0.00014049521709441057, "loss": 1.612, "step": 10786 }, { "epoch": 0.3863054416530879, "grad_norm": 1.4455130100250244, "learning_rate": 0.00014048461140525533, "loss": 1.5892, "step": 10787 }, { "epoch": 0.3863412537826562, "grad_norm": 1.716500997543335, "learning_rate": 0.0001404740051714319, "loss": 1.5128, "step": 10788 }, { "epoch": 0.3863770659122245, "grad_norm": 1.4911115169525146, "learning_rate": 0.00014046339839308294, "loss": 1.6088, "step": 10789 }, { "epoch": 0.38641287804179275, "grad_norm": 2.029979944229126, "learning_rate": 0.00014045279107035116, "loss": 1.488, "step": 10790 }, { "epoch": 0.386448690171361, "grad_norm": 1.7694584131240845, "learning_rate": 0.00014044218320337923, "loss": 1.5967, "step": 10791 }, { "epoch": 0.38648450230092934, "grad_norm": 1.7229918241500854, "learning_rate": 0.00014043157479230988, "loss": 1.4873, "step": 10792 }, { "epoch": 0.3865203144304976, "grad_norm": 1.647547721862793, "learning_rate": 0.00014042096583728587, "loss": 1.496, "step": 10793 }, { "epoch": 0.3865561265600659, "grad_norm": 1.3134502172470093, "learning_rate": 0.0001404103563384499, "loss": 1.3876, "step": 10794 }, { "epoch": 0.38659193868963415, "grad_norm": 1.5994287729263306, "learning_rate": 0.00014039974629594473, "loss": 1.7171, "step": 10795 }, { "epoch": 0.3866277508192025, "grad_norm": 1.576795220375061, "learning_rate": 0.00014038913570991302, "loss": 1.3701, "step": 10796 }, { "epoch": 0.38666356294877074, "grad_norm": 1.9416791200637817, "learning_rate": 0.00014037852458049764, "loss": 1.4239, "step": 10797 }, { "epoch": 0.386699375078339, "grad_norm": 1.8116300106048584, "learning_rate": 0.0001403679129078413, "loss": 1.5017, "step": 10798 }, { "epoch": 0.38673518720790734, "grad_norm": 1.8027644157409668, "learning_rate": 0.00014035730069208676, "loss": 1.4818, "step": 10799 }, { "epoch": 0.3867709993374756, "grad_norm": 1.8859972953796387, "learning_rate": 0.0001403466879333768, "loss": 1.5649, "step": 10800 }, { "epoch": 0.3868068114670439, "grad_norm": 1.517880916595459, "learning_rate": 0.00014033607463185416, "loss": 1.2432, "step": 10801 }, { "epoch": 0.38684262359661215, "grad_norm": 1.304983377456665, "learning_rate": 0.0001403254607876617, "loss": 1.5928, "step": 10802 }, { "epoch": 0.38687843572618047, "grad_norm": 2.295943021774292, "learning_rate": 0.00014031484640094217, "loss": 1.9763, "step": 10803 }, { "epoch": 0.38691424785574874, "grad_norm": 1.5124098062515259, "learning_rate": 0.0001403042314718384, "loss": 1.5043, "step": 10804 }, { "epoch": 0.386950059985317, "grad_norm": 1.4850661754608154, "learning_rate": 0.00014029361600049315, "loss": 1.3102, "step": 10805 }, { "epoch": 0.38698587211488533, "grad_norm": 1.8223068714141846, "learning_rate": 0.0001402829999870493, "loss": 1.2721, "step": 10806 }, { "epoch": 0.3870216842444536, "grad_norm": 1.4435514211654663, "learning_rate": 0.00014027238343164965, "loss": 1.4753, "step": 10807 }, { "epoch": 0.38705749637402187, "grad_norm": 1.5776134729385376, "learning_rate": 0.000140261766334437, "loss": 1.7435, "step": 10808 }, { "epoch": 0.38709330850359014, "grad_norm": 1.5881763696670532, "learning_rate": 0.00014025114869555425, "loss": 1.7955, "step": 10809 }, { "epoch": 0.38712912063315846, "grad_norm": 1.520105242729187, "learning_rate": 0.00014024053051514418, "loss": 1.7156, "step": 10810 }, { "epoch": 0.38716493276272673, "grad_norm": 2.3292229175567627, "learning_rate": 0.00014022991179334971, "loss": 1.5198, "step": 10811 }, { "epoch": 0.387200744892295, "grad_norm": 1.6294552087783813, "learning_rate": 0.00014021929253031366, "loss": 1.3763, "step": 10812 }, { "epoch": 0.3872365570218633, "grad_norm": 1.838181972503662, "learning_rate": 0.0001402086727261789, "loss": 1.7643, "step": 10813 }, { "epoch": 0.3872723691514316, "grad_norm": 3.064434051513672, "learning_rate": 0.0001401980523810883, "loss": 1.6755, "step": 10814 }, { "epoch": 0.38730818128099986, "grad_norm": 1.7557802200317383, "learning_rate": 0.0001401874314951848, "loss": 1.4726, "step": 10815 }, { "epoch": 0.38734399341056813, "grad_norm": 1.710260033607483, "learning_rate": 0.0001401768100686112, "loss": 1.6089, "step": 10816 }, { "epoch": 0.38737980554013646, "grad_norm": 1.567458152770996, "learning_rate": 0.00014016618810151047, "loss": 1.5403, "step": 10817 }, { "epoch": 0.3874156176697047, "grad_norm": 1.4473252296447754, "learning_rate": 0.00014015556559402551, "loss": 1.2182, "step": 10818 }, { "epoch": 0.387451429799273, "grad_norm": 1.6443681716918945, "learning_rate": 0.0001401449425462992, "loss": 1.5927, "step": 10819 }, { "epoch": 0.3874872419288413, "grad_norm": 1.7293095588684082, "learning_rate": 0.00014013431895847447, "loss": 1.4499, "step": 10820 }, { "epoch": 0.3875230540584096, "grad_norm": 1.4922728538513184, "learning_rate": 0.0001401236948306942, "loss": 1.5064, "step": 10821 }, { "epoch": 0.38755886618797786, "grad_norm": 1.9737058877944946, "learning_rate": 0.00014011307016310144, "loss": 1.3372, "step": 10822 }, { "epoch": 0.3875946783175461, "grad_norm": 3.1489598751068115, "learning_rate": 0.00014010244495583901, "loss": 1.6959, "step": 10823 }, { "epoch": 0.38763049044711445, "grad_norm": 2.1126914024353027, "learning_rate": 0.00014009181920904995, "loss": 1.5703, "step": 10824 }, { "epoch": 0.3876663025766827, "grad_norm": 2.1052439212799072, "learning_rate": 0.00014008119292287715, "loss": 1.8133, "step": 10825 }, { "epoch": 0.387702114706251, "grad_norm": 1.6701992750167847, "learning_rate": 0.00014007056609746362, "loss": 1.4596, "step": 10826 }, { "epoch": 0.3877379268358193, "grad_norm": 1.8624612092971802, "learning_rate": 0.00014005993873295234, "loss": 1.4561, "step": 10827 }, { "epoch": 0.3877737389653876, "grad_norm": 1.4672082662582397, "learning_rate": 0.0001400493108294862, "loss": 1.5657, "step": 10828 }, { "epoch": 0.38780955109495585, "grad_norm": 2.454993486404419, "learning_rate": 0.00014003868238720828, "loss": 1.7523, "step": 10829 }, { "epoch": 0.3878453632245241, "grad_norm": 2.431044578552246, "learning_rate": 0.0001400280534062615, "loss": 1.798, "step": 10830 }, { "epoch": 0.38788117535409244, "grad_norm": 1.4329546689987183, "learning_rate": 0.0001400174238867889, "loss": 1.3742, "step": 10831 }, { "epoch": 0.3879169874836607, "grad_norm": 1.5714000463485718, "learning_rate": 0.00014000679382893352, "loss": 1.4247, "step": 10832 }, { "epoch": 0.387952799613229, "grad_norm": 1.4519602060317993, "learning_rate": 0.0001399961632328383, "loss": 1.6319, "step": 10833 }, { "epoch": 0.3879886117427973, "grad_norm": 1.8219505548477173, "learning_rate": 0.00013998553209864628, "loss": 1.6325, "step": 10834 }, { "epoch": 0.3880244238723656, "grad_norm": 1.3136101961135864, "learning_rate": 0.00013997490042650054, "loss": 1.2345, "step": 10835 }, { "epoch": 0.38806023600193384, "grad_norm": 2.3637969493865967, "learning_rate": 0.00013996426821654407, "loss": 1.588, "step": 10836 }, { "epoch": 0.3880960481315021, "grad_norm": 1.519654631614685, "learning_rate": 0.00013995363546891992, "loss": 1.6943, "step": 10837 }, { "epoch": 0.38813186026107044, "grad_norm": 1.674434781074524, "learning_rate": 0.00013994300218377113, "loss": 1.9109, "step": 10838 }, { "epoch": 0.3881676723906387, "grad_norm": 1.8324869871139526, "learning_rate": 0.0001399323683612408, "loss": 1.5401, "step": 10839 }, { "epoch": 0.388203484520207, "grad_norm": 1.7333762645721436, "learning_rate": 0.00013992173400147193, "loss": 1.3558, "step": 10840 }, { "epoch": 0.3882392966497753, "grad_norm": 1.3128198385238647, "learning_rate": 0.00013991109910460763, "loss": 1.4118, "step": 10841 }, { "epoch": 0.38827510877934357, "grad_norm": 1.262176752090454, "learning_rate": 0.00013990046367079098, "loss": 1.5413, "step": 10842 }, { "epoch": 0.38831092090891184, "grad_norm": 1.3987606763839722, "learning_rate": 0.00013988982770016505, "loss": 1.4809, "step": 10843 }, { "epoch": 0.3883467330384801, "grad_norm": 1.460222601890564, "learning_rate": 0.00013987919119287296, "loss": 1.6431, "step": 10844 }, { "epoch": 0.38838254516804843, "grad_norm": 1.888704776763916, "learning_rate": 0.00013986855414905777, "loss": 1.3711, "step": 10845 }, { "epoch": 0.3884183572976167, "grad_norm": 1.4014217853546143, "learning_rate": 0.00013985791656886262, "loss": 1.457, "step": 10846 }, { "epoch": 0.38845416942718497, "grad_norm": 2.005265951156616, "learning_rate": 0.00013984727845243062, "loss": 1.5159, "step": 10847 }, { "epoch": 0.3884899815567533, "grad_norm": 1.5637860298156738, "learning_rate": 0.00013983663979990488, "loss": 1.4922, "step": 10848 }, { "epoch": 0.38852579368632156, "grad_norm": 2.402989387512207, "learning_rate": 0.00013982600061142854, "loss": 1.4786, "step": 10849 }, { "epoch": 0.38856160581588983, "grad_norm": 1.529707670211792, "learning_rate": 0.00013981536088714474, "loss": 1.4036, "step": 10850 }, { "epoch": 0.3885974179454581, "grad_norm": 1.7621464729309082, "learning_rate": 0.0001398047206271966, "loss": 1.474, "step": 10851 }, { "epoch": 0.3886332300750264, "grad_norm": 1.6757811307907104, "learning_rate": 0.00013979407983172733, "loss": 1.3865, "step": 10852 }, { "epoch": 0.3886690422045947, "grad_norm": 1.592047929763794, "learning_rate": 0.00013978343850088002, "loss": 1.3851, "step": 10853 }, { "epoch": 0.38870485433416296, "grad_norm": 1.6998521089553833, "learning_rate": 0.00013977279663479784, "loss": 1.3954, "step": 10854 }, { "epoch": 0.3887406664637313, "grad_norm": 1.5489436388015747, "learning_rate": 0.000139762154233624, "loss": 1.4166, "step": 10855 }, { "epoch": 0.38877647859329956, "grad_norm": 1.9484127759933472, "learning_rate": 0.00013975151129750168, "loss": 1.6642, "step": 10856 }, { "epoch": 0.3888122907228678, "grad_norm": 1.5500437021255493, "learning_rate": 0.00013974086782657404, "loss": 1.6083, "step": 10857 }, { "epoch": 0.3888481028524361, "grad_norm": 1.78920316696167, "learning_rate": 0.00013973022382098428, "loss": 1.2267, "step": 10858 }, { "epoch": 0.3888839149820044, "grad_norm": 1.2382129430770874, "learning_rate": 0.0001397195792808756, "loss": 1.5163, "step": 10859 }, { "epoch": 0.3889197271115727, "grad_norm": 1.68257737159729, "learning_rate": 0.00013970893420639123, "loss": 1.6837, "step": 10860 }, { "epoch": 0.38895553924114096, "grad_norm": 1.7023179531097412, "learning_rate": 0.00013969828859767438, "loss": 1.3082, "step": 10861 }, { "epoch": 0.3889913513707093, "grad_norm": 1.9222726821899414, "learning_rate": 0.00013968764245486824, "loss": 1.2721, "step": 10862 }, { "epoch": 0.38902716350027755, "grad_norm": 1.7391574382781982, "learning_rate": 0.0001396769957781161, "loss": 1.5351, "step": 10863 }, { "epoch": 0.3890629756298458, "grad_norm": 1.9035283327102661, "learning_rate": 0.00013966634856756114, "loss": 1.8143, "step": 10864 }, { "epoch": 0.3890987877594141, "grad_norm": 1.817996621131897, "learning_rate": 0.0001396557008233466, "loss": 1.4709, "step": 10865 }, { "epoch": 0.3891345998889824, "grad_norm": 1.6776641607284546, "learning_rate": 0.0001396450525456158, "loss": 1.8253, "step": 10866 }, { "epoch": 0.3891704120185507, "grad_norm": 1.5271328687667847, "learning_rate": 0.0001396344037345119, "loss": 1.4313, "step": 10867 }, { "epoch": 0.38920622414811895, "grad_norm": 2.033863067626953, "learning_rate": 0.0001396237543901783, "loss": 1.3897, "step": 10868 }, { "epoch": 0.3892420362776873, "grad_norm": 2.935681104660034, "learning_rate": 0.00013961310451275814, "loss": 1.6515, "step": 10869 }, { "epoch": 0.38927784840725554, "grad_norm": 2.0245561599731445, "learning_rate": 0.00013960245410239478, "loss": 1.5185, "step": 10870 }, { "epoch": 0.3893136605368238, "grad_norm": 1.5656148195266724, "learning_rate": 0.00013959180315923148, "loss": 1.2929, "step": 10871 }, { "epoch": 0.3893494726663921, "grad_norm": 2.131190538406372, "learning_rate": 0.00013958115168341155, "loss": 1.2898, "step": 10872 }, { "epoch": 0.3893852847959604, "grad_norm": 1.8734376430511475, "learning_rate": 0.00013957049967507824, "loss": 1.292, "step": 10873 }, { "epoch": 0.3894210969255287, "grad_norm": 1.5268676280975342, "learning_rate": 0.00013955984713437492, "loss": 1.6088, "step": 10874 }, { "epoch": 0.38945690905509694, "grad_norm": 2.3420557975769043, "learning_rate": 0.00013954919406144488, "loss": 1.5742, "step": 10875 }, { "epoch": 0.38949272118466527, "grad_norm": 2.152472496032715, "learning_rate": 0.00013953854045643146, "loss": 1.7288, "step": 10876 }, { "epoch": 0.38952853331423354, "grad_norm": 1.4512126445770264, "learning_rate": 0.00013952788631947798, "loss": 1.5296, "step": 10877 }, { "epoch": 0.3895643454438018, "grad_norm": 1.953406810760498, "learning_rate": 0.00013951723165072776, "loss": 1.575, "step": 10878 }, { "epoch": 0.3896001575733701, "grad_norm": 1.3942762613296509, "learning_rate": 0.00013950657645032418, "loss": 1.5451, "step": 10879 }, { "epoch": 0.3896359697029384, "grad_norm": 2.523400068283081, "learning_rate": 0.0001394959207184106, "loss": 1.6235, "step": 10880 }, { "epoch": 0.38967178183250667, "grad_norm": 1.6096271276474, "learning_rate": 0.00013948526445513033, "loss": 1.6995, "step": 10881 }, { "epoch": 0.38970759396207494, "grad_norm": 2.4701058864593506, "learning_rate": 0.00013947460766062673, "loss": 1.5205, "step": 10882 }, { "epoch": 0.38974340609164326, "grad_norm": 1.6288639307022095, "learning_rate": 0.00013946395033504323, "loss": 1.3126, "step": 10883 }, { "epoch": 0.38977921822121153, "grad_norm": 1.5390440225601196, "learning_rate": 0.00013945329247852317, "loss": 1.6212, "step": 10884 }, { "epoch": 0.3898150303507798, "grad_norm": 2.1103153228759766, "learning_rate": 0.00013944263409120997, "loss": 1.2683, "step": 10885 }, { "epoch": 0.38985084248034807, "grad_norm": 1.4607430696487427, "learning_rate": 0.00013943197517324698, "loss": 1.768, "step": 10886 }, { "epoch": 0.3898866546099164, "grad_norm": 1.542883276939392, "learning_rate": 0.00013942131572477763, "loss": 1.5286, "step": 10887 }, { "epoch": 0.38992246673948466, "grad_norm": 1.6651703119277954, "learning_rate": 0.00013941065574594536, "loss": 1.5245, "step": 10888 }, { "epoch": 0.38995827886905293, "grad_norm": 2.000406503677368, "learning_rate": 0.0001393999952368935, "loss": 1.3287, "step": 10889 }, { "epoch": 0.38999409099862126, "grad_norm": 1.859923243522644, "learning_rate": 0.0001393893341977656, "loss": 1.5322, "step": 10890 }, { "epoch": 0.3900299031281895, "grad_norm": 1.680471658706665, "learning_rate": 0.00013937867262870494, "loss": 1.2547, "step": 10891 }, { "epoch": 0.3900657152577578, "grad_norm": 1.6399215459823608, "learning_rate": 0.00013936801052985508, "loss": 1.71, "step": 10892 }, { "epoch": 0.39010152738732606, "grad_norm": 1.9132766723632812, "learning_rate": 0.0001393573479013594, "loss": 1.5958, "step": 10893 }, { "epoch": 0.3901373395168944, "grad_norm": 1.6897841691970825, "learning_rate": 0.00013934668474336137, "loss": 1.7573, "step": 10894 }, { "epoch": 0.39017315164646266, "grad_norm": 1.609229564666748, "learning_rate": 0.00013933602105600446, "loss": 1.3524, "step": 10895 }, { "epoch": 0.3902089637760309, "grad_norm": 1.546349287033081, "learning_rate": 0.00013932535683943212, "loss": 1.3228, "step": 10896 }, { "epoch": 0.39024477590559925, "grad_norm": 1.713444709777832, "learning_rate": 0.00013931469209378788, "loss": 1.5714, "step": 10897 }, { "epoch": 0.3902805880351675, "grad_norm": 1.8013064861297607, "learning_rate": 0.0001393040268192151, "loss": 1.5539, "step": 10898 }, { "epoch": 0.3903164001647358, "grad_norm": 1.501839280128479, "learning_rate": 0.00013929336101585737, "loss": 1.5056, "step": 10899 }, { "epoch": 0.39035221229430406, "grad_norm": 1.4470911026000977, "learning_rate": 0.00013928269468385814, "loss": 1.3623, "step": 10900 }, { "epoch": 0.3903880244238724, "grad_norm": 1.782845377922058, "learning_rate": 0.00013927202782336093, "loss": 1.5582, "step": 10901 }, { "epoch": 0.39042383655344065, "grad_norm": 1.5116702318191528, "learning_rate": 0.0001392613604345092, "loss": 1.2386, "step": 10902 }, { "epoch": 0.3904596486830089, "grad_norm": 1.541406512260437, "learning_rate": 0.00013925069251744657, "loss": 1.5252, "step": 10903 }, { "epoch": 0.39049546081257724, "grad_norm": 1.852392315864563, "learning_rate": 0.0001392400240723165, "loss": 1.5734, "step": 10904 }, { "epoch": 0.3905312729421455, "grad_norm": 1.53135085105896, "learning_rate": 0.00013922935509926249, "loss": 1.5326, "step": 10905 }, { "epoch": 0.3905670850717138, "grad_norm": 1.9085524082183838, "learning_rate": 0.00013921868559842813, "loss": 1.7359, "step": 10906 }, { "epoch": 0.39060289720128205, "grad_norm": 1.9585771560668945, "learning_rate": 0.00013920801556995693, "loss": 1.4947, "step": 10907 }, { "epoch": 0.3906387093308504, "grad_norm": 2.3244411945343018, "learning_rate": 0.00013919734501399248, "loss": 1.4937, "step": 10908 }, { "epoch": 0.39067452146041864, "grad_norm": 2.101654052734375, "learning_rate": 0.0001391866739306783, "loss": 1.5937, "step": 10909 }, { "epoch": 0.3907103335899869, "grad_norm": 1.9944405555725098, "learning_rate": 0.00013917600232015798, "loss": 1.5416, "step": 10910 }, { "epoch": 0.39074614571955524, "grad_norm": 1.5313005447387695, "learning_rate": 0.00013916533018257506, "loss": 1.5824, "step": 10911 }, { "epoch": 0.3907819578491235, "grad_norm": 1.490833044052124, "learning_rate": 0.00013915465751807314, "loss": 1.5661, "step": 10912 }, { "epoch": 0.3908177699786918, "grad_norm": 1.355104923248291, "learning_rate": 0.00013914398432679582, "loss": 1.498, "step": 10913 }, { "epoch": 0.39085358210826004, "grad_norm": 1.4459413290023804, "learning_rate": 0.00013913331060888667, "loss": 1.0748, "step": 10914 }, { "epoch": 0.39088939423782837, "grad_norm": 1.934935212135315, "learning_rate": 0.00013912263636448936, "loss": 1.3628, "step": 10915 }, { "epoch": 0.39092520636739664, "grad_norm": 1.31533944606781, "learning_rate": 0.00013911196159374737, "loss": 1.476, "step": 10916 }, { "epoch": 0.3909610184969649, "grad_norm": 1.765876293182373, "learning_rate": 0.00013910128629680441, "loss": 1.4795, "step": 10917 }, { "epoch": 0.39099683062653323, "grad_norm": 1.559696912765503, "learning_rate": 0.0001390906104738041, "loss": 1.7247, "step": 10918 }, { "epoch": 0.3910326427561015, "grad_norm": 1.7019007205963135, "learning_rate": 0.00013907993412489003, "loss": 1.3482, "step": 10919 }, { "epoch": 0.39106845488566977, "grad_norm": 1.4443830251693726, "learning_rate": 0.00013906925725020586, "loss": 1.5142, "step": 10920 }, { "epoch": 0.39110426701523804, "grad_norm": 1.9812666177749634, "learning_rate": 0.00013905857984989524, "loss": 1.4914, "step": 10921 }, { "epoch": 0.39114007914480636, "grad_norm": 1.4861475229263306, "learning_rate": 0.00013904790192410178, "loss": 1.4392, "step": 10922 }, { "epoch": 0.39117589127437463, "grad_norm": 1.292574167251587, "learning_rate": 0.0001390372234729692, "loss": 1.2898, "step": 10923 }, { "epoch": 0.3912117034039429, "grad_norm": 1.654436469078064, "learning_rate": 0.00013902654449664115, "loss": 1.4982, "step": 10924 }, { "epoch": 0.3912475155335112, "grad_norm": 1.3484128713607788, "learning_rate": 0.00013901586499526125, "loss": 1.6477, "step": 10925 }, { "epoch": 0.3912833276630795, "grad_norm": 1.577406883239746, "learning_rate": 0.0001390051849689732, "loss": 1.5074, "step": 10926 }, { "epoch": 0.39131913979264776, "grad_norm": 1.7564611434936523, "learning_rate": 0.00013899450441792074, "loss": 0.9454, "step": 10927 }, { "epoch": 0.39135495192221603, "grad_norm": 1.9713129997253418, "learning_rate": 0.00013898382334224748, "loss": 1.3936, "step": 10928 }, { "epoch": 0.39139076405178436, "grad_norm": 1.8760312795639038, "learning_rate": 0.0001389731417420972, "loss": 1.5234, "step": 10929 }, { "epoch": 0.3914265761813526, "grad_norm": 1.71038019657135, "learning_rate": 0.00013896245961761354, "loss": 1.4217, "step": 10930 }, { "epoch": 0.3914623883109209, "grad_norm": 1.5973619222640991, "learning_rate": 0.00013895177696894023, "loss": 1.3573, "step": 10931 }, { "epoch": 0.3914982004404892, "grad_norm": 1.6818480491638184, "learning_rate": 0.00013894109379622104, "loss": 1.3301, "step": 10932 }, { "epoch": 0.3915340125700575, "grad_norm": 1.7403838634490967, "learning_rate": 0.00013893041009959968, "loss": 1.5322, "step": 10933 }, { "epoch": 0.39156982469962576, "grad_norm": 1.5131548643112183, "learning_rate": 0.00013891972587921987, "loss": 1.6662, "step": 10934 }, { "epoch": 0.391605636829194, "grad_norm": 1.3520056009292603, "learning_rate": 0.0001389090411352253, "loss": 1.5041, "step": 10935 }, { "epoch": 0.39164144895876235, "grad_norm": 1.8540492057800293, "learning_rate": 0.0001388983558677598, "loss": 1.3033, "step": 10936 }, { "epoch": 0.3916772610883306, "grad_norm": 2.1093995571136475, "learning_rate": 0.00013888767007696709, "loss": 1.2123, "step": 10937 }, { "epoch": 0.3917130732178989, "grad_norm": 1.8228912353515625, "learning_rate": 0.00013887698376299095, "loss": 1.5145, "step": 10938 }, { "epoch": 0.3917488853474672, "grad_norm": 2.670003652572632, "learning_rate": 0.00013886629692597512, "loss": 1.4614, "step": 10939 }, { "epoch": 0.3917846974770355, "grad_norm": 1.368762731552124, "learning_rate": 0.00013885560956606344, "loss": 1.6102, "step": 10940 }, { "epoch": 0.39182050960660375, "grad_norm": 1.714065670967102, "learning_rate": 0.00013884492168339963, "loss": 1.7329, "step": 10941 }, { "epoch": 0.391856321736172, "grad_norm": 1.4392451047897339, "learning_rate": 0.00013883423327812748, "loss": 1.3499, "step": 10942 }, { "epoch": 0.39189213386574034, "grad_norm": 1.5470072031021118, "learning_rate": 0.00013882354435039085, "loss": 1.1483, "step": 10943 }, { "epoch": 0.3919279459953086, "grad_norm": 1.6122442483901978, "learning_rate": 0.00013881285490033348, "loss": 1.3297, "step": 10944 }, { "epoch": 0.3919637581248769, "grad_norm": 1.8792885541915894, "learning_rate": 0.00013880216492809924, "loss": 1.7625, "step": 10945 }, { "epoch": 0.3919995702544452, "grad_norm": 1.8728435039520264, "learning_rate": 0.00013879147443383188, "loss": 1.2987, "step": 10946 }, { "epoch": 0.3920353823840135, "grad_norm": 1.4084367752075195, "learning_rate": 0.00013878078341767532, "loss": 1.3605, "step": 10947 }, { "epoch": 0.39207119451358174, "grad_norm": 2.4692366123199463, "learning_rate": 0.00013877009187977332, "loss": 1.8605, "step": 10948 }, { "epoch": 0.39210700664315, "grad_norm": 1.8761765956878662, "learning_rate": 0.00013875939982026976, "loss": 1.5695, "step": 10949 }, { "epoch": 0.39214281877271834, "grad_norm": 1.9340341091156006, "learning_rate": 0.00013874870723930847, "loss": 1.6881, "step": 10950 }, { "epoch": 0.3921786309022866, "grad_norm": 1.415968418121338, "learning_rate": 0.00013873801413703327, "loss": 1.4739, "step": 10951 }, { "epoch": 0.3922144430318549, "grad_norm": 1.7562787532806396, "learning_rate": 0.00013872732051358808, "loss": 1.7517, "step": 10952 }, { "epoch": 0.3922502551614232, "grad_norm": 1.4888852834701538, "learning_rate": 0.00013871662636911672, "loss": 1.5784, "step": 10953 }, { "epoch": 0.39228606729099147, "grad_norm": 1.862896203994751, "learning_rate": 0.00013870593170376317, "loss": 1.4573, "step": 10954 }, { "epoch": 0.39232187942055974, "grad_norm": 1.7131226062774658, "learning_rate": 0.00013869523651767116, "loss": 1.8789, "step": 10955 }, { "epoch": 0.392357691550128, "grad_norm": 1.4080753326416016, "learning_rate": 0.0001386845408109847, "loss": 1.4324, "step": 10956 }, { "epoch": 0.39239350367969633, "grad_norm": 1.5288814306259155, "learning_rate": 0.0001386738445838476, "loss": 1.4847, "step": 10957 }, { "epoch": 0.3924293158092646, "grad_norm": 1.7750880718231201, "learning_rate": 0.00013866314783640384, "loss": 1.6698, "step": 10958 }, { "epoch": 0.39246512793883287, "grad_norm": 1.8281431198120117, "learning_rate": 0.0001386524505687973, "loss": 1.4908, "step": 10959 }, { "epoch": 0.3925009400684012, "grad_norm": 1.838854432106018, "learning_rate": 0.00013864175278117187, "loss": 1.4854, "step": 10960 }, { "epoch": 0.39253675219796946, "grad_norm": 1.9536410570144653, "learning_rate": 0.00013863105447367154, "loss": 1.6562, "step": 10961 }, { "epoch": 0.39257256432753773, "grad_norm": 2.2680394649505615, "learning_rate": 0.00013862035564644017, "loss": 1.5296, "step": 10962 }, { "epoch": 0.392608376457106, "grad_norm": 1.7162909507751465, "learning_rate": 0.00013860965629962176, "loss": 1.8054, "step": 10963 }, { "epoch": 0.3926441885866743, "grad_norm": 1.5176362991333008, "learning_rate": 0.0001385989564333602, "loss": 1.6443, "step": 10964 }, { "epoch": 0.3926800007162426, "grad_norm": 1.4724088907241821, "learning_rate": 0.00013858825604779945, "loss": 1.7563, "step": 10965 }, { "epoch": 0.39271581284581086, "grad_norm": 2.1419851779937744, "learning_rate": 0.00013857755514308352, "loss": 1.6473, "step": 10966 }, { "epoch": 0.3927516249753792, "grad_norm": 1.5697284936904907, "learning_rate": 0.00013856685371935637, "loss": 1.694, "step": 10967 }, { "epoch": 0.39278743710494746, "grad_norm": 1.7596726417541504, "learning_rate": 0.00013855615177676191, "loss": 1.5056, "step": 10968 }, { "epoch": 0.3928232492345157, "grad_norm": 1.5144803524017334, "learning_rate": 0.0001385454493154442, "loss": 1.589, "step": 10969 }, { "epoch": 0.392859061364084, "grad_norm": 2.599090337753296, "learning_rate": 0.00013853474633554715, "loss": 1.4556, "step": 10970 }, { "epoch": 0.3928948734936523, "grad_norm": 1.5587658882141113, "learning_rate": 0.00013852404283721482, "loss": 1.5985, "step": 10971 }, { "epoch": 0.3929306856232206, "grad_norm": 1.9133366346359253, "learning_rate": 0.00013851333882059118, "loss": 1.2122, "step": 10972 }, { "epoch": 0.39296649775278886, "grad_norm": 2.5919742584228516, "learning_rate": 0.00013850263428582022, "loss": 1.6127, "step": 10973 }, { "epoch": 0.3930023098823572, "grad_norm": 1.6070665121078491, "learning_rate": 0.000138491929233046, "loss": 1.5863, "step": 10974 }, { "epoch": 0.39303812201192545, "grad_norm": 1.462077021598816, "learning_rate": 0.00013848122366241254, "loss": 1.6105, "step": 10975 }, { "epoch": 0.3930739341414937, "grad_norm": 2.1995644569396973, "learning_rate": 0.00013847051757406384, "loss": 1.6227, "step": 10976 }, { "epoch": 0.393109746271062, "grad_norm": 2.4004290103912354, "learning_rate": 0.00013845981096814397, "loss": 1.446, "step": 10977 }, { "epoch": 0.3931455584006303, "grad_norm": 1.6153998374938965, "learning_rate": 0.00013844910384479693, "loss": 1.5331, "step": 10978 }, { "epoch": 0.3931813705301986, "grad_norm": 1.8544167280197144, "learning_rate": 0.00013843839620416678, "loss": 1.8615, "step": 10979 }, { "epoch": 0.39321718265976685, "grad_norm": 1.9386411905288696, "learning_rate": 0.00013842768804639763, "loss": 1.3094, "step": 10980 }, { "epoch": 0.3932529947893352, "grad_norm": 1.922755479812622, "learning_rate": 0.00013841697937163344, "loss": 1.6329, "step": 10981 }, { "epoch": 0.39328880691890344, "grad_norm": 1.3943654298782349, "learning_rate": 0.0001384062701800184, "loss": 1.542, "step": 10982 }, { "epoch": 0.3933246190484717, "grad_norm": 1.7033149003982544, "learning_rate": 0.00013839556047169654, "loss": 1.6458, "step": 10983 }, { "epoch": 0.39336043117804, "grad_norm": 1.56367826461792, "learning_rate": 0.00013838485024681192, "loss": 1.6611, "step": 10984 }, { "epoch": 0.3933962433076083, "grad_norm": 1.4666606187820435, "learning_rate": 0.00013837413950550865, "loss": 1.7409, "step": 10985 }, { "epoch": 0.3934320554371766, "grad_norm": 1.5731333494186401, "learning_rate": 0.00013836342824793084, "loss": 0.9751, "step": 10986 }, { "epoch": 0.39346786756674484, "grad_norm": 1.75849449634552, "learning_rate": 0.00013835271647422262, "loss": 1.4298, "step": 10987 }, { "epoch": 0.39350367969631317, "grad_norm": 1.5057103633880615, "learning_rate": 0.000138342004184528, "loss": 1.4403, "step": 10988 }, { "epoch": 0.39353949182588144, "grad_norm": 1.6212468147277832, "learning_rate": 0.00013833129137899122, "loss": 1.2772, "step": 10989 }, { "epoch": 0.3935753039554497, "grad_norm": 3.099400758743286, "learning_rate": 0.00013832057805775636, "loss": 1.8536, "step": 10990 }, { "epoch": 0.393611116085018, "grad_norm": 1.6015698909759521, "learning_rate": 0.00013830986422096756, "loss": 1.5765, "step": 10991 }, { "epoch": 0.3936469282145863, "grad_norm": 1.789380669593811, "learning_rate": 0.0001382991498687689, "loss": 1.57, "step": 10992 }, { "epoch": 0.39368274034415457, "grad_norm": 1.9253126382827759, "learning_rate": 0.00013828843500130462, "loss": 1.7023, "step": 10993 }, { "epoch": 0.39371855247372284, "grad_norm": 1.3724284172058105, "learning_rate": 0.00013827771961871885, "loss": 1.3947, "step": 10994 }, { "epoch": 0.3937543646032911, "grad_norm": 1.8409966230392456, "learning_rate": 0.0001382670037211557, "loss": 1.3981, "step": 10995 }, { "epoch": 0.39379017673285943, "grad_norm": 1.4560490846633911, "learning_rate": 0.0001382562873087594, "loss": 1.5862, "step": 10996 }, { "epoch": 0.3938259888624277, "grad_norm": 2.6296908855438232, "learning_rate": 0.00013824557038167408, "loss": 1.4751, "step": 10997 }, { "epoch": 0.39386180099199597, "grad_norm": 1.4091401100158691, "learning_rate": 0.00013823485294004397, "loss": 1.2807, "step": 10998 }, { "epoch": 0.3938976131215643, "grad_norm": 2.486598253250122, "learning_rate": 0.00013822413498401322, "loss": 1.4194, "step": 10999 }, { "epoch": 0.39393342525113256, "grad_norm": 1.8565677404403687, "learning_rate": 0.00013821341651372603, "loss": 1.4072, "step": 11000 }, { "epoch": 0.39396923738070083, "grad_norm": 1.5582574605941772, "learning_rate": 0.00013820269752932662, "loss": 1.1239, "step": 11001 }, { "epoch": 0.3940050495102691, "grad_norm": 1.714348316192627, "learning_rate": 0.0001381919780309592, "loss": 1.5673, "step": 11002 }, { "epoch": 0.3940408616398374, "grad_norm": 2.184767723083496, "learning_rate": 0.000138181258018768, "loss": 1.8622, "step": 11003 }, { "epoch": 0.3940766737694057, "grad_norm": 1.768456220626831, "learning_rate": 0.00013817053749289718, "loss": 1.511, "step": 11004 }, { "epoch": 0.39411248589897396, "grad_norm": 1.9604367017745972, "learning_rate": 0.00013815981645349105, "loss": 1.5908, "step": 11005 }, { "epoch": 0.3941482980285423, "grad_norm": 1.6243501901626587, "learning_rate": 0.00013814909490069378, "loss": 1.4674, "step": 11006 }, { "epoch": 0.39418411015811056, "grad_norm": 1.532596468925476, "learning_rate": 0.00013813837283464968, "loss": 1.5244, "step": 11007 }, { "epoch": 0.3942199222876788, "grad_norm": 2.020131826400757, "learning_rate": 0.00013812765025550294, "loss": 1.3743, "step": 11008 }, { "epoch": 0.3942557344172471, "grad_norm": 1.766100287437439, "learning_rate": 0.00013811692716339785, "loss": 1.5782, "step": 11009 }, { "epoch": 0.3942915465468154, "grad_norm": 1.5012962818145752, "learning_rate": 0.00013810620355847868, "loss": 1.5708, "step": 11010 }, { "epoch": 0.3943273586763837, "grad_norm": 1.4849494695663452, "learning_rate": 0.00013809547944088968, "loss": 1.9724, "step": 11011 }, { "epoch": 0.39436317080595196, "grad_norm": 2.92488694190979, "learning_rate": 0.00013808475481077518, "loss": 1.7187, "step": 11012 }, { "epoch": 0.3943989829355203, "grad_norm": 1.5572278499603271, "learning_rate": 0.00013807402966827944, "loss": 1.4533, "step": 11013 }, { "epoch": 0.39443479506508855, "grad_norm": 1.4926722049713135, "learning_rate": 0.00013806330401354671, "loss": 1.3641, "step": 11014 }, { "epoch": 0.3944706071946568, "grad_norm": 1.7692962884902954, "learning_rate": 0.0001380525778467213, "loss": 1.3569, "step": 11015 }, { "epoch": 0.3945064193242251, "grad_norm": 1.5108805894851685, "learning_rate": 0.00013804185116794755, "loss": 1.4749, "step": 11016 }, { "epoch": 0.3945422314537934, "grad_norm": 2.093026876449585, "learning_rate": 0.00013803112397736976, "loss": 1.4546, "step": 11017 }, { "epoch": 0.3945780435833617, "grad_norm": 1.4690407514572144, "learning_rate": 0.0001380203962751323, "loss": 1.2591, "step": 11018 }, { "epoch": 0.39461385571292995, "grad_norm": 1.3096941709518433, "learning_rate": 0.0001380096680613794, "loss": 1.4565, "step": 11019 }, { "epoch": 0.3946496678424983, "grad_norm": 1.9637916088104248, "learning_rate": 0.00013799893933625547, "loss": 1.2479, "step": 11020 }, { "epoch": 0.39468547997206654, "grad_norm": 1.3697775602340698, "learning_rate": 0.00013798821009990486, "loss": 1.457, "step": 11021 }, { "epoch": 0.3947212921016348, "grad_norm": 1.6081949472427368, "learning_rate": 0.00013797748035247184, "loss": 1.509, "step": 11022 }, { "epoch": 0.3947571042312031, "grad_norm": 2.8257575035095215, "learning_rate": 0.0001379667500941008, "loss": 1.5442, "step": 11023 }, { "epoch": 0.3947929163607714, "grad_norm": 1.9925357103347778, "learning_rate": 0.00013795601932493613, "loss": 1.6229, "step": 11024 }, { "epoch": 0.3948287284903397, "grad_norm": 1.7090239524841309, "learning_rate": 0.0001379452880451222, "loss": 1.4409, "step": 11025 }, { "epoch": 0.39486454061990794, "grad_norm": 1.6960958242416382, "learning_rate": 0.00013793455625480332, "loss": 1.4845, "step": 11026 }, { "epoch": 0.39490035274947627, "grad_norm": 1.5618412494659424, "learning_rate": 0.00013792382395412392, "loss": 1.8159, "step": 11027 }, { "epoch": 0.39493616487904454, "grad_norm": 1.413428783416748, "learning_rate": 0.00013791309114322841, "loss": 1.2555, "step": 11028 }, { "epoch": 0.3949719770086128, "grad_norm": 2.023017644882202, "learning_rate": 0.00013790235782226118, "loss": 1.5708, "step": 11029 }, { "epoch": 0.3950077891381811, "grad_norm": 1.610162615776062, "learning_rate": 0.0001378916239913666, "loss": 1.7002, "step": 11030 }, { "epoch": 0.3950436012677494, "grad_norm": 1.5207340717315674, "learning_rate": 0.0001378808896506891, "loss": 1.4454, "step": 11031 }, { "epoch": 0.39507941339731767, "grad_norm": 1.6515626907348633, "learning_rate": 0.00013787015480037307, "loss": 1.5858, "step": 11032 }, { "epoch": 0.39511522552688594, "grad_norm": 2.073228359222412, "learning_rate": 0.00013785941944056298, "loss": 1.5202, "step": 11033 }, { "epoch": 0.39515103765645426, "grad_norm": 1.6093931198120117, "learning_rate": 0.00013784868357140322, "loss": 1.6465, "step": 11034 }, { "epoch": 0.39518684978602253, "grad_norm": 2.0961008071899414, "learning_rate": 0.00013783794719303825, "loss": 1.6145, "step": 11035 }, { "epoch": 0.3952226619155908, "grad_norm": 1.5990267992019653, "learning_rate": 0.0001378272103056125, "loss": 1.6946, "step": 11036 }, { "epoch": 0.39525847404515907, "grad_norm": 1.5354348421096802, "learning_rate": 0.00013781647290927044, "loss": 1.4329, "step": 11037 }, { "epoch": 0.3952942861747274, "grad_norm": 1.4117317199707031, "learning_rate": 0.00013780573500415654, "loss": 1.4226, "step": 11038 }, { "epoch": 0.39533009830429566, "grad_norm": 1.3799502849578857, "learning_rate": 0.0001377949965904152, "loss": 1.3842, "step": 11039 }, { "epoch": 0.39536591043386393, "grad_norm": 1.3154661655426025, "learning_rate": 0.00013778425766819096, "loss": 1.3348, "step": 11040 }, { "epoch": 0.39540172256343226, "grad_norm": 2.150657892227173, "learning_rate": 0.00013777351823762826, "loss": 1.652, "step": 11041 }, { "epoch": 0.3954375346930005, "grad_norm": 1.667708396911621, "learning_rate": 0.0001377627782988716, "loss": 1.3867, "step": 11042 }, { "epoch": 0.3954733468225688, "grad_norm": 1.616040825843811, "learning_rate": 0.00013775203785206544, "loss": 1.2286, "step": 11043 }, { "epoch": 0.39550915895213706, "grad_norm": 1.6211915016174316, "learning_rate": 0.00013774129689735437, "loss": 1.3339, "step": 11044 }, { "epoch": 0.3955449710817054, "grad_norm": 2.3818204402923584, "learning_rate": 0.00013773055543488276, "loss": 1.5299, "step": 11045 }, { "epoch": 0.39558078321127366, "grad_norm": 1.4598685503005981, "learning_rate": 0.00013771981346479524, "loss": 1.5545, "step": 11046 }, { "epoch": 0.3956165953408419, "grad_norm": 2.1078193187713623, "learning_rate": 0.0001377090709872363, "loss": 1.712, "step": 11047 }, { "epoch": 0.39565240747041025, "grad_norm": 1.702226161956787, "learning_rate": 0.00013769832800235041, "loss": 1.4261, "step": 11048 }, { "epoch": 0.3956882195999785, "grad_norm": 1.6927379369735718, "learning_rate": 0.00013768758451028216, "loss": 1.5355, "step": 11049 }, { "epoch": 0.3957240317295468, "grad_norm": 1.7418303489685059, "learning_rate": 0.00013767684051117605, "loss": 1.5623, "step": 11050 }, { "epoch": 0.39575984385911506, "grad_norm": 1.6147509813308716, "learning_rate": 0.0001376660960051767, "loss": 1.7547, "step": 11051 }, { "epoch": 0.3957956559886834, "grad_norm": 1.5473684072494507, "learning_rate": 0.0001376553509924286, "loss": 1.4403, "step": 11052 }, { "epoch": 0.39583146811825165, "grad_norm": 1.8391984701156616, "learning_rate": 0.00013764460547307632, "loss": 1.6435, "step": 11053 }, { "epoch": 0.3958672802478199, "grad_norm": 1.5264986753463745, "learning_rate": 0.0001376338594472644, "loss": 1.4671, "step": 11054 }, { "epoch": 0.39590309237738824, "grad_norm": 2.0358102321624756, "learning_rate": 0.00013762311291513747, "loss": 1.5083, "step": 11055 }, { "epoch": 0.3959389045069565, "grad_norm": 1.842419147491455, "learning_rate": 0.0001376123658768401, "loss": 1.5692, "step": 11056 }, { "epoch": 0.3959747166365248, "grad_norm": 1.4179729223251343, "learning_rate": 0.00013760161833251683, "loss": 1.4895, "step": 11057 }, { "epoch": 0.39601052876609305, "grad_norm": 1.3994336128234863, "learning_rate": 0.00013759087028231232, "loss": 1.3776, "step": 11058 }, { "epoch": 0.3960463408956614, "grad_norm": 2.0251388549804688, "learning_rate": 0.0001375801217263711, "loss": 1.6126, "step": 11059 }, { "epoch": 0.39608215302522964, "grad_norm": 1.455640435218811, "learning_rate": 0.00013756937266483788, "loss": 1.513, "step": 11060 }, { "epoch": 0.3961179651547979, "grad_norm": 1.6252317428588867, "learning_rate": 0.00013755862309785716, "loss": 1.6759, "step": 11061 }, { "epoch": 0.39615377728436624, "grad_norm": 2.397956609725952, "learning_rate": 0.00013754787302557364, "loss": 1.5216, "step": 11062 }, { "epoch": 0.3961895894139345, "grad_norm": 1.762626051902771, "learning_rate": 0.0001375371224481319, "loss": 1.7711, "step": 11063 }, { "epoch": 0.3962254015435028, "grad_norm": 1.7544844150543213, "learning_rate": 0.0001375263713656766, "loss": 1.3992, "step": 11064 }, { "epoch": 0.39626121367307104, "grad_norm": 1.8529858589172363, "learning_rate": 0.00013751561977835242, "loss": 1.9665, "step": 11065 }, { "epoch": 0.39629702580263937, "grad_norm": 1.527550220489502, "learning_rate": 0.00013750486768630393, "loss": 1.6083, "step": 11066 }, { "epoch": 0.39633283793220764, "grad_norm": 1.813584327697754, "learning_rate": 0.00013749411508967582, "loss": 1.5276, "step": 11067 }, { "epoch": 0.3963686500617759, "grad_norm": 2.098923921585083, "learning_rate": 0.0001374833619886128, "loss": 2.1238, "step": 11068 }, { "epoch": 0.39640446219134423, "grad_norm": 1.9573220014572144, "learning_rate": 0.0001374726083832594, "loss": 1.4054, "step": 11069 }, { "epoch": 0.3964402743209125, "grad_norm": 1.5510364770889282, "learning_rate": 0.00013746185427376047, "loss": 1.4817, "step": 11070 }, { "epoch": 0.39647608645048077, "grad_norm": 1.4360061883926392, "learning_rate": 0.00013745109966026056, "loss": 1.533, "step": 11071 }, { "epoch": 0.39651189858004904, "grad_norm": 1.4258822202682495, "learning_rate": 0.00013744034454290447, "loss": 1.6174, "step": 11072 }, { "epoch": 0.39654771070961736, "grad_norm": 1.630110740661621, "learning_rate": 0.0001374295889218368, "loss": 1.4781, "step": 11073 }, { "epoch": 0.39658352283918563, "grad_norm": 1.3876277208328247, "learning_rate": 0.0001374188327972023, "loss": 1.4681, "step": 11074 }, { "epoch": 0.3966193349687539, "grad_norm": 1.5342926979064941, "learning_rate": 0.0001374080761691457, "loss": 1.5694, "step": 11075 }, { "epoch": 0.3966551470983222, "grad_norm": 1.8465675115585327, "learning_rate": 0.00013739731903781164, "loss": 1.4775, "step": 11076 }, { "epoch": 0.3966909592278905, "grad_norm": 1.8121492862701416, "learning_rate": 0.00013738656140334493, "loss": 1.4287, "step": 11077 }, { "epoch": 0.39672677135745876, "grad_norm": 1.8412760496139526, "learning_rate": 0.00013737580326589024, "loss": 1.7312, "step": 11078 }, { "epoch": 0.39676258348702703, "grad_norm": 2.1565401554107666, "learning_rate": 0.00013736504462559234, "loss": 1.4282, "step": 11079 }, { "epoch": 0.39679839561659536, "grad_norm": 1.9769231081008911, "learning_rate": 0.00013735428548259594, "loss": 1.3531, "step": 11080 }, { "epoch": 0.3968342077461636, "grad_norm": 2.425816535949707, "learning_rate": 0.00013734352583704587, "loss": 1.6955, "step": 11081 }, { "epoch": 0.3968700198757319, "grad_norm": 1.4811224937438965, "learning_rate": 0.00013733276568908678, "loss": 1.7023, "step": 11082 }, { "epoch": 0.3969058320053002, "grad_norm": 1.5459316968917847, "learning_rate": 0.00013732200503886348, "loss": 1.3232, "step": 11083 }, { "epoch": 0.3969416441348685, "grad_norm": 1.856197476387024, "learning_rate": 0.00013731124388652077, "loss": 1.5471, "step": 11084 }, { "epoch": 0.39697745626443676, "grad_norm": 1.9269754886627197, "learning_rate": 0.00013730048223220336, "loss": 1.4477, "step": 11085 }, { "epoch": 0.397013268394005, "grad_norm": 1.5941188335418701, "learning_rate": 0.00013728972007605612, "loss": 1.5713, "step": 11086 }, { "epoch": 0.39704908052357335, "grad_norm": 1.502655267715454, "learning_rate": 0.00013727895741822379, "loss": 1.3498, "step": 11087 }, { "epoch": 0.3970848926531416, "grad_norm": 1.3070005178451538, "learning_rate": 0.00013726819425885115, "loss": 1.5596, "step": 11088 }, { "epoch": 0.3971207047827099, "grad_norm": 1.96981680393219, "learning_rate": 0.00013725743059808305, "loss": 1.588, "step": 11089 }, { "epoch": 0.3971565169122782, "grad_norm": 2.043673276901245, "learning_rate": 0.00013724666643606429, "loss": 1.6474, "step": 11090 }, { "epoch": 0.3971923290418465, "grad_norm": 1.5892807245254517, "learning_rate": 0.00013723590177293967, "loss": 1.5802, "step": 11091 }, { "epoch": 0.39722814117141475, "grad_norm": 2.098597764968872, "learning_rate": 0.000137225136608854, "loss": 1.8254, "step": 11092 }, { "epoch": 0.397263953300983, "grad_norm": 1.9812650680541992, "learning_rate": 0.00013721437094395213, "loss": 1.4145, "step": 11093 }, { "epoch": 0.39729976543055134, "grad_norm": 2.5424814224243164, "learning_rate": 0.00013720360477837893, "loss": 1.5099, "step": 11094 }, { "epoch": 0.3973355775601196, "grad_norm": 1.5709184408187866, "learning_rate": 0.00013719283811227921, "loss": 1.6173, "step": 11095 }, { "epoch": 0.3973713896896879, "grad_norm": 1.7686853408813477, "learning_rate": 0.0001371820709457978, "loss": 1.8625, "step": 11096 }, { "epoch": 0.3974072018192562, "grad_norm": 2.2390694618225098, "learning_rate": 0.00013717130327907964, "loss": 1.6054, "step": 11097 }, { "epoch": 0.3974430139488245, "grad_norm": 2.229501247406006, "learning_rate": 0.00013716053511226949, "loss": 1.4254, "step": 11098 }, { "epoch": 0.39747882607839274, "grad_norm": 2.011171817779541, "learning_rate": 0.00013714976644551232, "loss": 1.4815, "step": 11099 }, { "epoch": 0.397514638207961, "grad_norm": 1.347294569015503, "learning_rate": 0.00013713899727895294, "loss": 1.3786, "step": 11100 }, { "epoch": 0.39755045033752934, "grad_norm": 1.8997390270233154, "learning_rate": 0.00013712822761273625, "loss": 1.6241, "step": 11101 }, { "epoch": 0.3975862624670976, "grad_norm": 1.4208718538284302, "learning_rate": 0.00013711745744700714, "loss": 1.5554, "step": 11102 }, { "epoch": 0.3976220745966659, "grad_norm": 1.6987639665603638, "learning_rate": 0.00013710668678191054, "loss": 1.5971, "step": 11103 }, { "epoch": 0.3976578867262342, "grad_norm": 2.2498080730438232, "learning_rate": 0.00013709591561759135, "loss": 1.378, "step": 11104 }, { "epoch": 0.39769369885580247, "grad_norm": 1.919611930847168, "learning_rate": 0.00013708514395419444, "loss": 1.8187, "step": 11105 }, { "epoch": 0.39772951098537074, "grad_norm": 1.7510775327682495, "learning_rate": 0.00013707437179186476, "loss": 1.4297, "step": 11106 }, { "epoch": 0.397765323114939, "grad_norm": 1.8031326532363892, "learning_rate": 0.0001370635991307472, "loss": 1.6006, "step": 11107 }, { "epoch": 0.39780113524450733, "grad_norm": 1.6740226745605469, "learning_rate": 0.00013705282597098676, "loss": 1.2622, "step": 11108 }, { "epoch": 0.3978369473740756, "grad_norm": 1.7174181938171387, "learning_rate": 0.00013704205231272835, "loss": 1.7329, "step": 11109 }, { "epoch": 0.39787275950364387, "grad_norm": 1.7932454347610474, "learning_rate": 0.0001370312781561169, "loss": 1.6485, "step": 11110 }, { "epoch": 0.3979085716332122, "grad_norm": 1.698287010192871, "learning_rate": 0.00013702050350129734, "loss": 1.6169, "step": 11111 }, { "epoch": 0.39794438376278046, "grad_norm": 1.7249375581741333, "learning_rate": 0.00013700972834841468, "loss": 1.9336, "step": 11112 }, { "epoch": 0.39798019589234873, "grad_norm": 1.4442729949951172, "learning_rate": 0.00013699895269761385, "loss": 1.2067, "step": 11113 }, { "epoch": 0.398016008021917, "grad_norm": 1.2577317953109741, "learning_rate": 0.00013698817654903984, "loss": 1.5325, "step": 11114 }, { "epoch": 0.3980518201514853, "grad_norm": 1.5931488275527954, "learning_rate": 0.00013697739990283764, "loss": 1.568, "step": 11115 }, { "epoch": 0.3980876322810536, "grad_norm": 1.8899900913238525, "learning_rate": 0.00013696662275915222, "loss": 1.2821, "step": 11116 }, { "epoch": 0.39812344441062186, "grad_norm": 1.9897520542144775, "learning_rate": 0.00013695584511812857, "loss": 1.5535, "step": 11117 }, { "epoch": 0.3981592565401902, "grad_norm": 2.0698153972625732, "learning_rate": 0.00013694506697991169, "loss": 1.5855, "step": 11118 }, { "epoch": 0.39819506866975846, "grad_norm": 2.299964427947998, "learning_rate": 0.00013693428834464662, "loss": 1.5469, "step": 11119 }, { "epoch": 0.3982308807993267, "grad_norm": 1.849847674369812, "learning_rate": 0.00013692350921247829, "loss": 1.4537, "step": 11120 }, { "epoch": 0.398266692928895, "grad_norm": 1.605886459350586, "learning_rate": 0.00013691272958355182, "loss": 1.6433, "step": 11121 }, { "epoch": 0.3983025050584633, "grad_norm": 1.4784764051437378, "learning_rate": 0.00013690194945801214, "loss": 1.6726, "step": 11122 }, { "epoch": 0.3983383171880316, "grad_norm": 2.1290183067321777, "learning_rate": 0.00013689116883600436, "loss": 1.6535, "step": 11123 }, { "epoch": 0.39837412931759986, "grad_norm": 1.8331060409545898, "learning_rate": 0.00013688038771767345, "loss": 1.5419, "step": 11124 }, { "epoch": 0.3984099414471682, "grad_norm": 1.70211660861969, "learning_rate": 0.00013686960610316456, "loss": 1.2083, "step": 11125 }, { "epoch": 0.39844575357673645, "grad_norm": 1.9089553356170654, "learning_rate": 0.00013685882399262265, "loss": 1.8192, "step": 11126 }, { "epoch": 0.3984815657063047, "grad_norm": 1.5048595666885376, "learning_rate": 0.0001368480413861928, "loss": 1.5921, "step": 11127 }, { "epoch": 0.398517377835873, "grad_norm": 1.579541802406311, "learning_rate": 0.0001368372582840201, "loss": 1.5551, "step": 11128 }, { "epoch": 0.3985531899654413, "grad_norm": 1.3262720108032227, "learning_rate": 0.00013682647468624958, "loss": 1.4928, "step": 11129 }, { "epoch": 0.3985890020950096, "grad_norm": 1.7601323127746582, "learning_rate": 0.00013681569059302638, "loss": 1.5003, "step": 11130 }, { "epoch": 0.39862481422457785, "grad_norm": 2.1725378036499023, "learning_rate": 0.00013680490600449552, "loss": 1.6579, "step": 11131 }, { "epoch": 0.3986606263541462, "grad_norm": 1.540235161781311, "learning_rate": 0.00013679412092080213, "loss": 1.5752, "step": 11132 }, { "epoch": 0.39869643848371444, "grad_norm": 1.5734163522720337, "learning_rate": 0.0001367833353420913, "loss": 1.7024, "step": 11133 }, { "epoch": 0.3987322506132827, "grad_norm": 1.3746634721755981, "learning_rate": 0.00013677254926850818, "loss": 1.53, "step": 11134 }, { "epoch": 0.398768062742851, "grad_norm": 1.5181868076324463, "learning_rate": 0.0001367617627001978, "loss": 1.4935, "step": 11135 }, { "epoch": 0.3988038748724193, "grad_norm": 1.9445048570632935, "learning_rate": 0.0001367509756373053, "loss": 1.541, "step": 11136 }, { "epoch": 0.3988396870019876, "grad_norm": 1.680619478225708, "learning_rate": 0.0001367401880799759, "loss": 1.596, "step": 11137 }, { "epoch": 0.39887549913155584, "grad_norm": 1.310683250427246, "learning_rate": 0.0001367294000283546, "loss": 1.5233, "step": 11138 }, { "epoch": 0.39891131126112417, "grad_norm": 1.5829931497573853, "learning_rate": 0.00013671861148258665, "loss": 1.6821, "step": 11139 }, { "epoch": 0.39894712339069244, "grad_norm": 1.4462175369262695, "learning_rate": 0.0001367078224428171, "loss": 1.5534, "step": 11140 }, { "epoch": 0.3989829355202607, "grad_norm": 1.4816615581512451, "learning_rate": 0.00013669703290919118, "loss": 1.5118, "step": 11141 }, { "epoch": 0.399018747649829, "grad_norm": 1.906777262687683, "learning_rate": 0.00013668624288185402, "loss": 1.8655, "step": 11142 }, { "epoch": 0.3990545597793973, "grad_norm": 1.6060969829559326, "learning_rate": 0.00013667545236095076, "loss": 1.4475, "step": 11143 }, { "epoch": 0.39909037190896557, "grad_norm": 1.6323474645614624, "learning_rate": 0.00013666466134662662, "loss": 1.6315, "step": 11144 }, { "epoch": 0.39912618403853384, "grad_norm": 1.5477476119995117, "learning_rate": 0.00013665386983902672, "loss": 1.7228, "step": 11145 }, { "epoch": 0.39916199616810216, "grad_norm": 1.9727189540863037, "learning_rate": 0.00013664307783829634, "loss": 1.5455, "step": 11146 }, { "epoch": 0.39919780829767043, "grad_norm": 1.7289396524429321, "learning_rate": 0.00013663228534458054, "loss": 1.5659, "step": 11147 }, { "epoch": 0.3992336204272387, "grad_norm": 1.9361799955368042, "learning_rate": 0.00013662149235802465, "loss": 1.4145, "step": 11148 }, { "epoch": 0.39926943255680697, "grad_norm": 1.8130748271942139, "learning_rate": 0.0001366106988787738, "loss": 1.8146, "step": 11149 }, { "epoch": 0.3993052446863753, "grad_norm": 1.4281504154205322, "learning_rate": 0.00013659990490697322, "loss": 1.4696, "step": 11150 }, { "epoch": 0.39934105681594356, "grad_norm": 1.5529637336730957, "learning_rate": 0.0001365891104427681, "loss": 1.6064, "step": 11151 }, { "epoch": 0.39937686894551183, "grad_norm": 1.484028697013855, "learning_rate": 0.00013657831548630377, "loss": 1.5061, "step": 11152 }, { "epoch": 0.39941268107508016, "grad_norm": 1.630724310874939, "learning_rate": 0.00013656752003772535, "loss": 1.5966, "step": 11153 }, { "epoch": 0.3994484932046484, "grad_norm": 1.61417818069458, "learning_rate": 0.00013655672409717813, "loss": 1.4266, "step": 11154 }, { "epoch": 0.3994843053342167, "grad_norm": 3.1259992122650146, "learning_rate": 0.0001365459276648073, "loss": 1.6252, "step": 11155 }, { "epoch": 0.39952011746378496, "grad_norm": 1.3071650266647339, "learning_rate": 0.00013653513074075816, "loss": 1.5813, "step": 11156 }, { "epoch": 0.3995559295933533, "grad_norm": 1.3977751731872559, "learning_rate": 0.000136524333325176, "loss": 1.6765, "step": 11157 }, { "epoch": 0.39959174172292156, "grad_norm": 1.4183727502822876, "learning_rate": 0.00013651353541820603, "loss": 1.299, "step": 11158 }, { "epoch": 0.3996275538524898, "grad_norm": 1.6623196601867676, "learning_rate": 0.00013650273701999353, "loss": 1.4861, "step": 11159 }, { "epoch": 0.39966336598205815, "grad_norm": 1.5375479459762573, "learning_rate": 0.0001364919381306838, "loss": 1.7469, "step": 11160 }, { "epoch": 0.3996991781116264, "grad_norm": 1.6787738800048828, "learning_rate": 0.00013648113875042213, "loss": 1.4347, "step": 11161 }, { "epoch": 0.3997349902411947, "grad_norm": 2.2604610919952393, "learning_rate": 0.00013647033887935378, "loss": 1.3712, "step": 11162 }, { "epoch": 0.39977080237076296, "grad_norm": 2.34169340133667, "learning_rate": 0.00013645953851762406, "loss": 1.1879, "step": 11163 }, { "epoch": 0.3998066145003313, "grad_norm": 1.8770477771759033, "learning_rate": 0.00013644873766537828, "loss": 1.5683, "step": 11164 }, { "epoch": 0.39984242662989955, "grad_norm": 1.317833423614502, "learning_rate": 0.00013643793632276175, "loss": 1.0902, "step": 11165 }, { "epoch": 0.3998782387594678, "grad_norm": 1.86030912399292, "learning_rate": 0.00013642713448991977, "loss": 1.0972, "step": 11166 }, { "epoch": 0.39991405088903614, "grad_norm": 3.0373973846435547, "learning_rate": 0.0001364163321669977, "loss": 1.9666, "step": 11167 }, { "epoch": 0.3999498630186044, "grad_norm": 1.878786563873291, "learning_rate": 0.00013640552935414085, "loss": 1.4069, "step": 11168 }, { "epoch": 0.3999856751481727, "grad_norm": 1.554965615272522, "learning_rate": 0.00013639472605149456, "loss": 1.3437, "step": 11169 }, { "epoch": 0.40002148727774095, "grad_norm": 1.9674904346466064, "learning_rate": 0.00013638392225920418, "loss": 1.7638, "step": 11170 }, { "epoch": 0.4000572994073093, "grad_norm": 1.3602914810180664, "learning_rate": 0.00013637311797741507, "loss": 1.4951, "step": 11171 }, { "epoch": 0.40009311153687754, "grad_norm": 2.014244318008423, "learning_rate": 0.00013636231320627258, "loss": 1.5531, "step": 11172 }, { "epoch": 0.4001289236664458, "grad_norm": 1.8055243492126465, "learning_rate": 0.00013635150794592205, "loss": 1.1671, "step": 11173 }, { "epoch": 0.40016473579601414, "grad_norm": 1.5012273788452148, "learning_rate": 0.00013634070219650888, "loss": 1.5464, "step": 11174 }, { "epoch": 0.4002005479255824, "grad_norm": 1.5045928955078125, "learning_rate": 0.00013632989595817842, "loss": 1.1954, "step": 11175 }, { "epoch": 0.4002363600551507, "grad_norm": 1.548518180847168, "learning_rate": 0.0001363190892310761, "loss": 1.3469, "step": 11176 }, { "epoch": 0.40027217218471894, "grad_norm": 1.9656542539596558, "learning_rate": 0.00013630828201534727, "loss": 1.4379, "step": 11177 }, { "epoch": 0.40030798431428727, "grad_norm": 1.4483617544174194, "learning_rate": 0.00013629747431113734, "loss": 1.7661, "step": 11178 }, { "epoch": 0.40034379644385554, "grad_norm": 2.1301279067993164, "learning_rate": 0.0001362866661185917, "loss": 1.5943, "step": 11179 }, { "epoch": 0.4003796085734238, "grad_norm": 1.7540024518966675, "learning_rate": 0.00013627585743785582, "loss": 1.6543, "step": 11180 }, { "epoch": 0.40041542070299213, "grad_norm": 1.4993693828582764, "learning_rate": 0.000136265048269075, "loss": 1.4884, "step": 11181 }, { "epoch": 0.4004512328325604, "grad_norm": 1.55181086063385, "learning_rate": 0.0001362542386123948, "loss": 1.3658, "step": 11182 }, { "epoch": 0.40048704496212867, "grad_norm": 1.8561002016067505, "learning_rate": 0.00013624342846796058, "loss": 1.321, "step": 11183 }, { "epoch": 0.40052285709169694, "grad_norm": 1.5602530241012573, "learning_rate": 0.00013623261783591773, "loss": 1.6882, "step": 11184 }, { "epoch": 0.40055866922126526, "grad_norm": 1.8052551746368408, "learning_rate": 0.00013622180671641178, "loss": 1.3348, "step": 11185 }, { "epoch": 0.40059448135083353, "grad_norm": 1.9859580993652344, "learning_rate": 0.0001362109951095881, "loss": 1.6761, "step": 11186 }, { "epoch": 0.4006302934804018, "grad_norm": 1.7264128923416138, "learning_rate": 0.00013620018301559224, "loss": 1.5664, "step": 11187 }, { "epoch": 0.4006661056099701, "grad_norm": 1.941522240638733, "learning_rate": 0.0001361893704345696, "loss": 1.5823, "step": 11188 }, { "epoch": 0.4007019177395384, "grad_norm": 1.5751817226409912, "learning_rate": 0.00013617855736666566, "loss": 1.5329, "step": 11189 }, { "epoch": 0.40073772986910666, "grad_norm": 2.8278791904449463, "learning_rate": 0.00013616774381202591, "loss": 1.644, "step": 11190 }, { "epoch": 0.40077354199867493, "grad_norm": 1.5788226127624512, "learning_rate": 0.00013615692977079577, "loss": 1.3641, "step": 11191 }, { "epoch": 0.40080935412824326, "grad_norm": 1.462515115737915, "learning_rate": 0.00013614611524312084, "loss": 1.6041, "step": 11192 }, { "epoch": 0.4008451662578115, "grad_norm": 1.6150527000427246, "learning_rate": 0.0001361353002291465, "loss": 1.4713, "step": 11193 }, { "epoch": 0.4008809783873798, "grad_norm": 1.730208158493042, "learning_rate": 0.00013612448472901834, "loss": 1.5619, "step": 11194 }, { "epoch": 0.40091679051694806, "grad_norm": 1.573136329650879, "learning_rate": 0.00013611366874288186, "loss": 1.8576, "step": 11195 }, { "epoch": 0.4009526026465164, "grad_norm": 1.67069411277771, "learning_rate": 0.0001361028522708825, "loss": 1.6783, "step": 11196 }, { "epoch": 0.40098841477608466, "grad_norm": 1.7830244302749634, "learning_rate": 0.00013609203531316587, "loss": 1.5543, "step": 11197 }, { "epoch": 0.4010242269056529, "grad_norm": 1.4327481985092163, "learning_rate": 0.0001360812178698774, "loss": 1.1594, "step": 11198 }, { "epoch": 0.40106003903522125, "grad_norm": 1.6612839698791504, "learning_rate": 0.00013607039994116278, "loss": 1.4161, "step": 11199 }, { "epoch": 0.4010958511647895, "grad_norm": 2.0589075088500977, "learning_rate": 0.00013605958152716738, "loss": 1.7591, "step": 11200 }, { "epoch": 0.4011316632943578, "grad_norm": 1.7634742259979248, "learning_rate": 0.00013604876262803686, "loss": 1.3869, "step": 11201 }, { "epoch": 0.40116747542392606, "grad_norm": 2.3785300254821777, "learning_rate": 0.00013603794324391672, "loss": 1.3486, "step": 11202 }, { "epoch": 0.4012032875534944, "grad_norm": 1.635969877243042, "learning_rate": 0.00013602712337495255, "loss": 1.3801, "step": 11203 }, { "epoch": 0.40123909968306265, "grad_norm": 1.580276370048523, "learning_rate": 0.0001360163030212899, "loss": 1.6318, "step": 11204 }, { "epoch": 0.4012749118126309, "grad_norm": 1.8174611330032349, "learning_rate": 0.00013600548218307436, "loss": 1.3491, "step": 11205 }, { "epoch": 0.40131072394219924, "grad_norm": 1.3981926441192627, "learning_rate": 0.0001359946608604515, "loss": 1.6881, "step": 11206 }, { "epoch": 0.4013465360717675, "grad_norm": 1.802919864654541, "learning_rate": 0.00013598383905356692, "loss": 1.2794, "step": 11207 }, { "epoch": 0.4013823482013358, "grad_norm": 1.6066579818725586, "learning_rate": 0.00013597301676256617, "loss": 1.356, "step": 11208 }, { "epoch": 0.40141816033090405, "grad_norm": 1.9527837038040161, "learning_rate": 0.0001359621939875949, "loss": 1.5963, "step": 11209 }, { "epoch": 0.4014539724604724, "grad_norm": 1.324955701828003, "learning_rate": 0.00013595137072879867, "loss": 1.2462, "step": 11210 }, { "epoch": 0.40148978459004064, "grad_norm": 1.5357877016067505, "learning_rate": 0.00013594054698632315, "loss": 1.4547, "step": 11211 }, { "epoch": 0.4015255967196089, "grad_norm": 1.6919299364089966, "learning_rate": 0.00013592972276031394, "loss": 1.6643, "step": 11212 }, { "epoch": 0.40156140884917724, "grad_norm": 1.8728044033050537, "learning_rate": 0.00013591889805091663, "loss": 1.6113, "step": 11213 }, { "epoch": 0.4015972209787455, "grad_norm": 2.333636999130249, "learning_rate": 0.00013590807285827688, "loss": 1.7807, "step": 11214 }, { "epoch": 0.4016330331083138, "grad_norm": 1.5089396238327026, "learning_rate": 0.00013589724718254036, "loss": 1.212, "step": 11215 }, { "epoch": 0.40166884523788204, "grad_norm": 1.860792636871338, "learning_rate": 0.00013588642102385266, "loss": 1.782, "step": 11216 }, { "epoch": 0.40170465736745037, "grad_norm": 1.6318405866622925, "learning_rate": 0.00013587559438235945, "loss": 1.5339, "step": 11217 }, { "epoch": 0.40174046949701864, "grad_norm": 1.822420358657837, "learning_rate": 0.0001358647672582064, "loss": 1.453, "step": 11218 }, { "epoch": 0.4017762816265869, "grad_norm": 1.4181208610534668, "learning_rate": 0.00013585393965153916, "loss": 1.3009, "step": 11219 }, { "epoch": 0.40181209375615523, "grad_norm": 1.3538849353790283, "learning_rate": 0.00013584311156250342, "loss": 1.6589, "step": 11220 }, { "epoch": 0.4018479058857235, "grad_norm": 1.5220893621444702, "learning_rate": 0.00013583228299124484, "loss": 1.7311, "step": 11221 }, { "epoch": 0.40188371801529177, "grad_norm": 1.5974732637405396, "learning_rate": 0.00013582145393790913, "loss": 1.4992, "step": 11222 }, { "epoch": 0.40191953014486004, "grad_norm": 2.0485281944274902, "learning_rate": 0.00013581062440264194, "loss": 1.4456, "step": 11223 }, { "epoch": 0.40195534227442836, "grad_norm": 1.678962230682373, "learning_rate": 0.000135799794385589, "loss": 1.5724, "step": 11224 }, { "epoch": 0.40199115440399663, "grad_norm": 1.9616618156433105, "learning_rate": 0.00013578896388689602, "loss": 1.5544, "step": 11225 }, { "epoch": 0.4020269665335649, "grad_norm": 1.4487061500549316, "learning_rate": 0.00013577813290670867, "loss": 1.467, "step": 11226 }, { "epoch": 0.4020627786631332, "grad_norm": 2.1156294345855713, "learning_rate": 0.00013576730144517271, "loss": 1.8479, "step": 11227 }, { "epoch": 0.4020985907927015, "grad_norm": 1.5825573205947876, "learning_rate": 0.00013575646950243384, "loss": 1.7531, "step": 11228 }, { "epoch": 0.40213440292226976, "grad_norm": 2.3100428581237793, "learning_rate": 0.0001357456370786378, "loss": 1.467, "step": 11229 }, { "epoch": 0.40217021505183803, "grad_norm": 1.4864134788513184, "learning_rate": 0.0001357348041739303, "loss": 1.5086, "step": 11230 }, { "epoch": 0.40220602718140636, "grad_norm": 1.748326301574707, "learning_rate": 0.00013572397078845716, "loss": 1.3973, "step": 11231 }, { "epoch": 0.4022418393109746, "grad_norm": 1.4479734897613525, "learning_rate": 0.00013571313692236405, "loss": 1.4109, "step": 11232 }, { "epoch": 0.4022776514405429, "grad_norm": 1.8514389991760254, "learning_rate": 0.0001357023025757967, "loss": 1.5098, "step": 11233 }, { "epoch": 0.4023134635701112, "grad_norm": 1.9194141626358032, "learning_rate": 0.00013569146774890099, "loss": 1.6107, "step": 11234 }, { "epoch": 0.4023492756996795, "grad_norm": 1.9210814237594604, "learning_rate": 0.00013568063244182257, "loss": 1.5859, "step": 11235 }, { "epoch": 0.40238508782924776, "grad_norm": 1.5344477891921997, "learning_rate": 0.00013566979665470728, "loss": 1.4888, "step": 11236 }, { "epoch": 0.402420899958816, "grad_norm": 2.001387596130371, "learning_rate": 0.0001356589603877009, "loss": 1.5566, "step": 11237 }, { "epoch": 0.40245671208838435, "grad_norm": 1.8244560956954956, "learning_rate": 0.0001356481236409492, "loss": 1.2914, "step": 11238 }, { "epoch": 0.4024925242179526, "grad_norm": 1.440247654914856, "learning_rate": 0.00013563728641459793, "loss": 1.5425, "step": 11239 }, { "epoch": 0.4025283363475209, "grad_norm": 1.331313133239746, "learning_rate": 0.000135626448708793, "loss": 1.5918, "step": 11240 }, { "epoch": 0.4025641484770892, "grad_norm": 1.4485142230987549, "learning_rate": 0.00013561561052368015, "loss": 1.5563, "step": 11241 }, { "epoch": 0.4025999606066575, "grad_norm": 1.3569890260696411, "learning_rate": 0.0001356047718594052, "loss": 1.6618, "step": 11242 }, { "epoch": 0.40263577273622575, "grad_norm": 1.8978397846221924, "learning_rate": 0.00013559393271611397, "loss": 1.7617, "step": 11243 }, { "epoch": 0.402671584865794, "grad_norm": 1.6500682830810547, "learning_rate": 0.00013558309309395224, "loss": 1.1864, "step": 11244 }, { "epoch": 0.40270739699536234, "grad_norm": 1.9103257656097412, "learning_rate": 0.00013557225299306599, "loss": 1.4868, "step": 11245 }, { "epoch": 0.4027432091249306, "grad_norm": 1.6831821203231812, "learning_rate": 0.00013556141241360088, "loss": 1.8324, "step": 11246 }, { "epoch": 0.4027790212544989, "grad_norm": 1.6163675785064697, "learning_rate": 0.00013555057135570286, "loss": 1.4925, "step": 11247 }, { "epoch": 0.4028148333840672, "grad_norm": 1.781957983970642, "learning_rate": 0.00013553972981951776, "loss": 1.3588, "step": 11248 }, { "epoch": 0.4028506455136355, "grad_norm": 1.2353285551071167, "learning_rate": 0.00013552888780519144, "loss": 1.4921, "step": 11249 }, { "epoch": 0.40288645764320374, "grad_norm": 1.3386553525924683, "learning_rate": 0.00013551804531286975, "loss": 1.6313, "step": 11250 }, { "epoch": 0.402922269772772, "grad_norm": 1.669419288635254, "learning_rate": 0.0001355072023426986, "loss": 1.432, "step": 11251 }, { "epoch": 0.40295808190234034, "grad_norm": 2.4253623485565186, "learning_rate": 0.00013549635889482383, "loss": 1.4862, "step": 11252 }, { "epoch": 0.4029938940319086, "grad_norm": 2.272901773452759, "learning_rate": 0.00013548551496939132, "loss": 1.4508, "step": 11253 }, { "epoch": 0.4030297061614769, "grad_norm": 1.535967230796814, "learning_rate": 0.00013547467056654702, "loss": 1.576, "step": 11254 }, { "epoch": 0.4030655182910452, "grad_norm": 1.4173799753189087, "learning_rate": 0.00013546382568643676, "loss": 1.7135, "step": 11255 }, { "epoch": 0.40310133042061347, "grad_norm": 1.3089243173599243, "learning_rate": 0.00013545298032920647, "loss": 1.4986, "step": 11256 }, { "epoch": 0.40313714255018174, "grad_norm": 1.5522119998931885, "learning_rate": 0.00013544213449500204, "loss": 1.5136, "step": 11257 }, { "epoch": 0.40317295467975, "grad_norm": 1.4867783784866333, "learning_rate": 0.00013543128818396946, "loss": 1.4835, "step": 11258 }, { "epoch": 0.40320876680931833, "grad_norm": 1.8949105739593506, "learning_rate": 0.0001354204413962546, "loss": 1.5772, "step": 11259 }, { "epoch": 0.4032445789388866, "grad_norm": 2.0525848865509033, "learning_rate": 0.00013540959413200335, "loss": 1.4172, "step": 11260 }, { "epoch": 0.40328039106845487, "grad_norm": 1.6664822101593018, "learning_rate": 0.0001353987463913617, "loss": 1.6019, "step": 11261 }, { "epoch": 0.4033162031980232, "grad_norm": 2.6384687423706055, "learning_rate": 0.0001353878981744756, "loss": 1.633, "step": 11262 }, { "epoch": 0.40335201532759146, "grad_norm": 1.2509064674377441, "learning_rate": 0.00013537704948149093, "loss": 1.5135, "step": 11263 }, { "epoch": 0.40338782745715973, "grad_norm": 2.389633893966675, "learning_rate": 0.00013536620031255373, "loss": 1.7426, "step": 11264 }, { "epoch": 0.403423639586728, "grad_norm": 2.0430221557617188, "learning_rate": 0.0001353553506678099, "loss": 1.5984, "step": 11265 }, { "epoch": 0.4034594517162963, "grad_norm": 1.7450594902038574, "learning_rate": 0.00013534450054740544, "loss": 1.5399, "step": 11266 }, { "epoch": 0.4034952638458646, "grad_norm": 1.6870568990707397, "learning_rate": 0.0001353336499514863, "loss": 1.4738, "step": 11267 }, { "epoch": 0.40353107597543286, "grad_norm": 1.8001635074615479, "learning_rate": 0.00013532279888019851, "loss": 1.2668, "step": 11268 }, { "epoch": 0.4035668881050012, "grad_norm": 2.56563663482666, "learning_rate": 0.00013531194733368805, "loss": 1.8747, "step": 11269 }, { "epoch": 0.40360270023456946, "grad_norm": 1.735708236694336, "learning_rate": 0.00013530109531210082, "loss": 1.5633, "step": 11270 }, { "epoch": 0.4036385123641377, "grad_norm": 1.9577866792678833, "learning_rate": 0.00013529024281558292, "loss": 1.4575, "step": 11271 }, { "epoch": 0.403674324493706, "grad_norm": 2.2287938594818115, "learning_rate": 0.00013527938984428031, "loss": 1.6088, "step": 11272 }, { "epoch": 0.4037101366232743, "grad_norm": 1.7325700521469116, "learning_rate": 0.00013526853639833904, "loss": 1.7316, "step": 11273 }, { "epoch": 0.4037459487528426, "grad_norm": 1.6742218732833862, "learning_rate": 0.0001352576824779051, "loss": 1.3724, "step": 11274 }, { "epoch": 0.40378176088241086, "grad_norm": 1.6109325885772705, "learning_rate": 0.0001352468280831245, "loss": 1.2103, "step": 11275 }, { "epoch": 0.4038175730119792, "grad_norm": 1.916214108467102, "learning_rate": 0.00013523597321414332, "loss": 1.5114, "step": 11276 }, { "epoch": 0.40385338514154745, "grad_norm": 2.142789840698242, "learning_rate": 0.00013522511787110756, "loss": 1.6882, "step": 11277 }, { "epoch": 0.4038891972711157, "grad_norm": 1.7598340511322021, "learning_rate": 0.00013521426205416326, "loss": 1.49, "step": 11278 }, { "epoch": 0.403925009400684, "grad_norm": 1.6194044351577759, "learning_rate": 0.00013520340576345653, "loss": 1.6978, "step": 11279 }, { "epoch": 0.4039608215302523, "grad_norm": 1.4875582456588745, "learning_rate": 0.00013519254899913333, "loss": 1.7141, "step": 11280 }, { "epoch": 0.4039966336598206, "grad_norm": 1.9088953733444214, "learning_rate": 0.0001351816917613398, "loss": 1.4717, "step": 11281 }, { "epoch": 0.40403244578938885, "grad_norm": 1.5134812593460083, "learning_rate": 0.00013517083405022203, "loss": 1.6138, "step": 11282 }, { "epoch": 0.4040682579189572, "grad_norm": 2.6607894897460938, "learning_rate": 0.000135159975865926, "loss": 1.6279, "step": 11283 }, { "epoch": 0.40410407004852544, "grad_norm": 2.2413384914398193, "learning_rate": 0.00013514911720859785, "loss": 1.6775, "step": 11284 }, { "epoch": 0.4041398821780937, "grad_norm": 1.3868253231048584, "learning_rate": 0.00013513825807838373, "loss": 1.425, "step": 11285 }, { "epoch": 0.404175694307662, "grad_norm": 1.4734158515930176, "learning_rate": 0.0001351273984754296, "loss": 1.4511, "step": 11286 }, { "epoch": 0.4042115064372303, "grad_norm": 1.6310186386108398, "learning_rate": 0.00013511653839988168, "loss": 1.5508, "step": 11287 }, { "epoch": 0.4042473185667986, "grad_norm": 2.4635744094848633, "learning_rate": 0.000135105677851886, "loss": 1.4342, "step": 11288 }, { "epoch": 0.40428313069636684, "grad_norm": 1.7325843572616577, "learning_rate": 0.00013509481683158874, "loss": 1.5283, "step": 11289 }, { "epoch": 0.40431894282593517, "grad_norm": 1.6843584775924683, "learning_rate": 0.00013508395533913593, "loss": 1.4873, "step": 11290 }, { "epoch": 0.40435475495550344, "grad_norm": 1.6397114992141724, "learning_rate": 0.0001350730933746738, "loss": 1.3648, "step": 11291 }, { "epoch": 0.4043905670850717, "grad_norm": 1.6449841260910034, "learning_rate": 0.00013506223093834844, "loss": 1.5678, "step": 11292 }, { "epoch": 0.40442637921464, "grad_norm": 1.9263081550598145, "learning_rate": 0.000135051368030306, "loss": 1.6234, "step": 11293 }, { "epoch": 0.4044621913442083, "grad_norm": 1.5096728801727295, "learning_rate": 0.00013504050465069263, "loss": 1.7048, "step": 11294 }, { "epoch": 0.40449800347377657, "grad_norm": 1.5539870262145996, "learning_rate": 0.0001350296407996544, "loss": 1.4478, "step": 11295 }, { "epoch": 0.40453381560334484, "grad_norm": 1.7265819311141968, "learning_rate": 0.0001350187764773376, "loss": 1.5398, "step": 11296 }, { "epoch": 0.40456962773291316, "grad_norm": 1.5002291202545166, "learning_rate": 0.0001350079116838883, "loss": 1.5066, "step": 11297 }, { "epoch": 0.40460543986248143, "grad_norm": 1.3665101528167725, "learning_rate": 0.0001349970464194527, "loss": 1.2355, "step": 11298 }, { "epoch": 0.4046412519920497, "grad_norm": 1.7081962823867798, "learning_rate": 0.000134986180684177, "loss": 1.4824, "step": 11299 }, { "epoch": 0.40467706412161797, "grad_norm": 1.3695849180221558, "learning_rate": 0.0001349753144782074, "loss": 1.5157, "step": 11300 }, { "epoch": 0.4047128762511863, "grad_norm": 2.080824613571167, "learning_rate": 0.00013496444780169, "loss": 1.5212, "step": 11301 }, { "epoch": 0.40474868838075456, "grad_norm": 1.3417965173721313, "learning_rate": 0.0001349535806547711, "loss": 1.2956, "step": 11302 }, { "epoch": 0.40478450051032283, "grad_norm": 1.4742838144302368, "learning_rate": 0.00013494271303759686, "loss": 1.7153, "step": 11303 }, { "epoch": 0.40482031263989116, "grad_norm": 1.4015898704528809, "learning_rate": 0.0001349318449503135, "loss": 1.3194, "step": 11304 }, { "epoch": 0.4048561247694594, "grad_norm": 1.504159688949585, "learning_rate": 0.00013492097639306716, "loss": 1.5072, "step": 11305 }, { "epoch": 0.4048919368990277, "grad_norm": 1.7774419784545898, "learning_rate": 0.00013491010736600418, "loss": 1.7561, "step": 11306 }, { "epoch": 0.40492774902859596, "grad_norm": 1.551797866821289, "learning_rate": 0.0001348992378692707, "loss": 1.5001, "step": 11307 }, { "epoch": 0.4049635611581643, "grad_norm": 2.107363224029541, "learning_rate": 0.000134888367903013, "loss": 1.3526, "step": 11308 }, { "epoch": 0.40499937328773256, "grad_norm": 3.087078332901001, "learning_rate": 0.00013487749746737734, "loss": 1.4878, "step": 11309 }, { "epoch": 0.4050351854173008, "grad_norm": 1.5227560997009277, "learning_rate": 0.0001348666265625099, "loss": 1.4858, "step": 11310 }, { "epoch": 0.40507099754686915, "grad_norm": 1.4322707653045654, "learning_rate": 0.00013485575518855703, "loss": 1.4656, "step": 11311 }, { "epoch": 0.4051068096764374, "grad_norm": 2.5870959758758545, "learning_rate": 0.00013484488334566488, "loss": 1.466, "step": 11312 }, { "epoch": 0.4051426218060057, "grad_norm": 1.6336684226989746, "learning_rate": 0.00013483401103397982, "loss": 1.5328, "step": 11313 }, { "epoch": 0.40517843393557396, "grad_norm": 1.972943902015686, "learning_rate": 0.00013482313825364804, "loss": 1.3578, "step": 11314 }, { "epoch": 0.4052142460651423, "grad_norm": 2.440329074859619, "learning_rate": 0.00013481226500481588, "loss": 1.7942, "step": 11315 }, { "epoch": 0.40525005819471055, "grad_norm": 1.815645694732666, "learning_rate": 0.00013480139128762956, "loss": 1.5568, "step": 11316 }, { "epoch": 0.4052858703242788, "grad_norm": 1.4573594331741333, "learning_rate": 0.00013479051710223544, "loss": 1.5361, "step": 11317 }, { "epoch": 0.40532168245384714, "grad_norm": 1.3116105794906616, "learning_rate": 0.00013477964244877977, "loss": 1.5229, "step": 11318 }, { "epoch": 0.4053574945834154, "grad_norm": 1.4079471826553345, "learning_rate": 0.0001347687673274089, "loss": 1.2103, "step": 11319 }, { "epoch": 0.4053933067129837, "grad_norm": 1.2982499599456787, "learning_rate": 0.00013475789173826908, "loss": 1.5497, "step": 11320 }, { "epoch": 0.40542911884255195, "grad_norm": 1.6635154485702515, "learning_rate": 0.0001347470156815067, "loss": 1.5242, "step": 11321 }, { "epoch": 0.4054649309721203, "grad_norm": 1.7815632820129395, "learning_rate": 0.000134736139157268, "loss": 1.6833, "step": 11322 }, { "epoch": 0.40550074310168854, "grad_norm": 1.7860198020935059, "learning_rate": 0.0001347252621656994, "loss": 1.5752, "step": 11323 }, { "epoch": 0.4055365552312568, "grad_norm": 1.5376157760620117, "learning_rate": 0.00013471438470694715, "loss": 1.5881, "step": 11324 }, { "epoch": 0.40557236736082514, "grad_norm": 2.517604112625122, "learning_rate": 0.00013470350678115763, "loss": 1.3554, "step": 11325 }, { "epoch": 0.4056081794903934, "grad_norm": 1.7985392808914185, "learning_rate": 0.00013469262838847724, "loss": 1.3522, "step": 11326 }, { "epoch": 0.4056439916199617, "grad_norm": 1.6951795816421509, "learning_rate": 0.00013468174952905223, "loss": 1.3, "step": 11327 }, { "epoch": 0.40567980374952994, "grad_norm": 1.8560683727264404, "learning_rate": 0.00013467087020302906, "loss": 1.4333, "step": 11328 }, { "epoch": 0.40571561587909827, "grad_norm": 1.9412565231323242, "learning_rate": 0.00013465999041055405, "loss": 1.4037, "step": 11329 }, { "epoch": 0.40575142800866654, "grad_norm": 2.763273000717163, "learning_rate": 0.00013464911015177356, "loss": 1.3453, "step": 11330 }, { "epoch": 0.4057872401382348, "grad_norm": 1.9450551271438599, "learning_rate": 0.000134638229426834, "loss": 1.6067, "step": 11331 }, { "epoch": 0.40582305226780313, "grad_norm": 1.551149606704712, "learning_rate": 0.0001346273482358817, "loss": 1.5155, "step": 11332 }, { "epoch": 0.4058588643973714, "grad_norm": 1.4368470907211304, "learning_rate": 0.00013461646657906315, "loss": 1.556, "step": 11333 }, { "epoch": 0.40589467652693967, "grad_norm": 2.317528486251831, "learning_rate": 0.00013460558445652467, "loss": 1.9397, "step": 11334 }, { "epoch": 0.40593048865650794, "grad_norm": 1.6196165084838867, "learning_rate": 0.0001345947018684127, "loss": 1.6216, "step": 11335 }, { "epoch": 0.40596630078607626, "grad_norm": 1.3833155632019043, "learning_rate": 0.00013458381881487362, "loss": 1.4274, "step": 11336 }, { "epoch": 0.40600211291564453, "grad_norm": 2.2270424365997314, "learning_rate": 0.0001345729352960539, "loss": 1.3553, "step": 11337 }, { "epoch": 0.4060379250452128, "grad_norm": 1.5133336782455444, "learning_rate": 0.00013456205131209988, "loss": 1.5309, "step": 11338 }, { "epoch": 0.4060737371747811, "grad_norm": 1.5618396997451782, "learning_rate": 0.0001345511668631581, "loss": 1.411, "step": 11339 }, { "epoch": 0.4061095493043494, "grad_norm": 1.4137815237045288, "learning_rate": 0.0001345402819493749, "loss": 1.677, "step": 11340 }, { "epoch": 0.40614536143391766, "grad_norm": 1.4480739831924438, "learning_rate": 0.00013452939657089677, "loss": 1.4027, "step": 11341 }, { "epoch": 0.40618117356348593, "grad_norm": 1.5944091081619263, "learning_rate": 0.00013451851072787013, "loss": 1.2003, "step": 11342 }, { "epoch": 0.40621698569305426, "grad_norm": 2.348677635192871, "learning_rate": 0.00013450762442044148, "loss": 1.3259, "step": 11343 }, { "epoch": 0.4062527978226225, "grad_norm": 1.4781785011291504, "learning_rate": 0.00013449673764875724, "loss": 1.4524, "step": 11344 }, { "epoch": 0.4062886099521908, "grad_norm": 1.2914907932281494, "learning_rate": 0.00013448585041296392, "loss": 1.4464, "step": 11345 }, { "epoch": 0.4063244220817591, "grad_norm": 1.4890108108520508, "learning_rate": 0.00013447496271320794, "loss": 1.5749, "step": 11346 }, { "epoch": 0.4063602342113274, "grad_norm": 1.6016771793365479, "learning_rate": 0.00013446407454963582, "loss": 1.6343, "step": 11347 }, { "epoch": 0.40639604634089566, "grad_norm": 1.856371521949768, "learning_rate": 0.00013445318592239405, "loss": 1.3749, "step": 11348 }, { "epoch": 0.4064318584704639, "grad_norm": 1.780447006225586, "learning_rate": 0.00013444229683162904, "loss": 1.5556, "step": 11349 }, { "epoch": 0.40646767060003225, "grad_norm": 2.996824026107788, "learning_rate": 0.00013443140727748738, "loss": 1.5899, "step": 11350 }, { "epoch": 0.4065034827296005, "grad_norm": 1.7526699304580688, "learning_rate": 0.0001344205172601156, "loss": 1.501, "step": 11351 }, { "epoch": 0.4065392948591688, "grad_norm": 3.1774349212646484, "learning_rate": 0.00013440962677966012, "loss": 1.3241, "step": 11352 }, { "epoch": 0.4065751069887371, "grad_norm": 1.467477798461914, "learning_rate": 0.0001343987358362675, "loss": 1.7437, "step": 11353 }, { "epoch": 0.4066109191183054, "grad_norm": 2.0850508213043213, "learning_rate": 0.00013438784443008426, "loss": 1.3627, "step": 11354 }, { "epoch": 0.40664673124787365, "grad_norm": 1.6988506317138672, "learning_rate": 0.00013437695256125694, "loss": 1.7532, "step": 11355 }, { "epoch": 0.4066825433774419, "grad_norm": 2.0369551181793213, "learning_rate": 0.00013436606022993207, "loss": 1.561, "step": 11356 }, { "epoch": 0.40671835550701024, "grad_norm": 1.7097434997558594, "learning_rate": 0.00013435516743625617, "loss": 1.8608, "step": 11357 }, { "epoch": 0.4067541676365785, "grad_norm": 1.62166166305542, "learning_rate": 0.0001343442741803758, "loss": 1.4686, "step": 11358 }, { "epoch": 0.4067899797661468, "grad_norm": 1.4330084323883057, "learning_rate": 0.00013433338046243753, "loss": 1.5757, "step": 11359 }, { "epoch": 0.4068257918957151, "grad_norm": 1.9137028455734253, "learning_rate": 0.0001343224862825879, "loss": 1.4864, "step": 11360 }, { "epoch": 0.4068616040252834, "grad_norm": 1.4365180730819702, "learning_rate": 0.00013431159164097354, "loss": 1.6628, "step": 11361 }, { "epoch": 0.40689741615485164, "grad_norm": 1.4980610609054565, "learning_rate": 0.0001343006965377409, "loss": 1.1436, "step": 11362 }, { "epoch": 0.4069332282844199, "grad_norm": 2.000791549682617, "learning_rate": 0.00013428980097303668, "loss": 1.6187, "step": 11363 }, { "epoch": 0.40696904041398824, "grad_norm": 1.9143515825271606, "learning_rate": 0.0001342789049470074, "loss": 1.6053, "step": 11364 }, { "epoch": 0.4070048525435565, "grad_norm": 1.4883040189743042, "learning_rate": 0.0001342680084597997, "loss": 1.3271, "step": 11365 }, { "epoch": 0.4070406646731248, "grad_norm": 1.6826872825622559, "learning_rate": 0.00013425711151156014, "loss": 1.7451, "step": 11366 }, { "epoch": 0.4070764768026931, "grad_norm": 1.5650379657745361, "learning_rate": 0.00013424621410243533, "loss": 1.5927, "step": 11367 }, { "epoch": 0.40711228893226137, "grad_norm": 2.28118634223938, "learning_rate": 0.00013423531623257189, "loss": 1.3887, "step": 11368 }, { "epoch": 0.40714810106182964, "grad_norm": 1.7174426317214966, "learning_rate": 0.0001342244179021164, "loss": 1.7299, "step": 11369 }, { "epoch": 0.4071839131913979, "grad_norm": 2.240532875061035, "learning_rate": 0.00013421351911121554, "loss": 1.5032, "step": 11370 }, { "epoch": 0.40721972532096623, "grad_norm": 1.6691335439682007, "learning_rate": 0.00013420261986001587, "loss": 1.6486, "step": 11371 }, { "epoch": 0.4072555374505345, "grad_norm": 2.2593131065368652, "learning_rate": 0.00013419172014866412, "loss": 1.4365, "step": 11372 }, { "epoch": 0.40729134958010277, "grad_norm": 2.0483601093292236, "learning_rate": 0.00013418081997730686, "loss": 1.6662, "step": 11373 }, { "epoch": 0.4073271617096711, "grad_norm": 1.5782848596572876, "learning_rate": 0.00013416991934609075, "loss": 1.5193, "step": 11374 }, { "epoch": 0.40736297383923936, "grad_norm": 1.8060847520828247, "learning_rate": 0.00013415901825516248, "loss": 1.5458, "step": 11375 }, { "epoch": 0.40739878596880763, "grad_norm": 1.808779001235962, "learning_rate": 0.00013414811670466864, "loss": 1.6425, "step": 11376 }, { "epoch": 0.4074345980983759, "grad_norm": 1.4207754135131836, "learning_rate": 0.00013413721469475597, "loss": 1.4269, "step": 11377 }, { "epoch": 0.4074704102279442, "grad_norm": 1.7335152626037598, "learning_rate": 0.00013412631222557112, "loss": 1.5142, "step": 11378 }, { "epoch": 0.4075062223575125, "grad_norm": 1.4673773050308228, "learning_rate": 0.00013411540929726072, "loss": 1.406, "step": 11379 }, { "epoch": 0.40754203448708076, "grad_norm": 1.2677370309829712, "learning_rate": 0.0001341045059099715, "loss": 1.6056, "step": 11380 }, { "epoch": 0.4075778466166491, "grad_norm": 2.6489250659942627, "learning_rate": 0.00013409360206385017, "loss": 1.6785, "step": 11381 }, { "epoch": 0.40761365874621736, "grad_norm": 1.6678003072738647, "learning_rate": 0.00013408269775904338, "loss": 1.64, "step": 11382 }, { "epoch": 0.4076494708757856, "grad_norm": 1.7335892915725708, "learning_rate": 0.00013407179299569787, "loss": 1.405, "step": 11383 }, { "epoch": 0.4076852830053539, "grad_norm": 1.8726739883422852, "learning_rate": 0.00013406088777396033, "loss": 1.3341, "step": 11384 }, { "epoch": 0.4077210951349222, "grad_norm": 1.368068814277649, "learning_rate": 0.00013404998209397748, "loss": 1.5909, "step": 11385 }, { "epoch": 0.4077569072644905, "grad_norm": 1.7381190061569214, "learning_rate": 0.00013403907595589605, "loss": 1.5986, "step": 11386 }, { "epoch": 0.40779271939405876, "grad_norm": 1.8664449453353882, "learning_rate": 0.0001340281693598627, "loss": 1.5861, "step": 11387 }, { "epoch": 0.407828531523627, "grad_norm": 1.7823396921157837, "learning_rate": 0.0001340172623060243, "loss": 1.3708, "step": 11388 }, { "epoch": 0.40786434365319535, "grad_norm": 2.0772182941436768, "learning_rate": 0.0001340063547945275, "loss": 1.5313, "step": 11389 }, { "epoch": 0.4079001557827636, "grad_norm": 1.9356260299682617, "learning_rate": 0.00013399544682551903, "loss": 1.5213, "step": 11390 }, { "epoch": 0.4079359679123319, "grad_norm": 1.6405465602874756, "learning_rate": 0.00013398453839914574, "loss": 1.6985, "step": 11391 }, { "epoch": 0.4079717800419002, "grad_norm": 2.4301764965057373, "learning_rate": 0.00013397362951555425, "loss": 1.3963, "step": 11392 }, { "epoch": 0.4080075921714685, "grad_norm": 1.3280025720596313, "learning_rate": 0.00013396272017489143, "loss": 1.4303, "step": 11393 }, { "epoch": 0.40804340430103675, "grad_norm": 2.0638201236724854, "learning_rate": 0.000133951810377304, "loss": 1.6184, "step": 11394 }, { "epoch": 0.408079216430605, "grad_norm": 1.1395586729049683, "learning_rate": 0.00013394090012293879, "loss": 1.673, "step": 11395 }, { "epoch": 0.40811502856017334, "grad_norm": 1.4466770887374878, "learning_rate": 0.0001339299894119425, "loss": 1.6637, "step": 11396 }, { "epoch": 0.4081508406897416, "grad_norm": 2.1084930896759033, "learning_rate": 0.00013391907824446202, "loss": 1.53, "step": 11397 }, { "epoch": 0.4081866528193099, "grad_norm": 1.475360631942749, "learning_rate": 0.00013390816662064406, "loss": 1.5123, "step": 11398 }, { "epoch": 0.4082224649488782, "grad_norm": 1.497314691543579, "learning_rate": 0.00013389725454063549, "loss": 1.4362, "step": 11399 }, { "epoch": 0.4082582770784465, "grad_norm": 2.5741963386535645, "learning_rate": 0.00013388634200458305, "loss": 1.6763, "step": 11400 }, { "epoch": 0.40829408920801474, "grad_norm": 2.4950594902038574, "learning_rate": 0.00013387542901263362, "loss": 1.5976, "step": 11401 }, { "epoch": 0.408329901337583, "grad_norm": 1.6021647453308105, "learning_rate": 0.00013386451556493396, "loss": 1.8064, "step": 11402 }, { "epoch": 0.40836571346715134, "grad_norm": 1.5093995332717896, "learning_rate": 0.00013385360166163094, "loss": 1.6469, "step": 11403 }, { "epoch": 0.4084015255967196, "grad_norm": 1.5861656665802002, "learning_rate": 0.00013384268730287136, "loss": 1.3995, "step": 11404 }, { "epoch": 0.4084373377262879, "grad_norm": 1.5600558519363403, "learning_rate": 0.0001338317724888021, "loss": 1.1743, "step": 11405 }, { "epoch": 0.4084731498558562, "grad_norm": 1.8158670663833618, "learning_rate": 0.00013382085721956997, "loss": 1.6269, "step": 11406 }, { "epoch": 0.40850896198542447, "grad_norm": 1.5254065990447998, "learning_rate": 0.00013380994149532181, "loss": 1.3899, "step": 11407 }, { "epoch": 0.40854477411499274, "grad_norm": 1.4239981174468994, "learning_rate": 0.00013379902531620455, "loss": 1.6413, "step": 11408 }, { "epoch": 0.408580586244561, "grad_norm": 2.2646539211273193, "learning_rate": 0.00013378810868236497, "loss": 1.7264, "step": 11409 }, { "epoch": 0.40861639837412933, "grad_norm": 1.6422070264816284, "learning_rate": 0.00013377719159394998, "loss": 1.5903, "step": 11410 }, { "epoch": 0.4086522105036976, "grad_norm": 1.7455012798309326, "learning_rate": 0.00013376627405110644, "loss": 1.4329, "step": 11411 }, { "epoch": 0.40868802263326587, "grad_norm": 1.596226453781128, "learning_rate": 0.00013375535605398127, "loss": 1.5022, "step": 11412 }, { "epoch": 0.4087238347628342, "grad_norm": 1.9514662027359009, "learning_rate": 0.00013374443760272127, "loss": 1.3195, "step": 11413 }, { "epoch": 0.40875964689240246, "grad_norm": 1.9477596282958984, "learning_rate": 0.0001337335186974734, "loss": 1.605, "step": 11414 }, { "epoch": 0.40879545902197073, "grad_norm": 1.2788459062576294, "learning_rate": 0.00013372259933838458, "loss": 1.4258, "step": 11415 }, { "epoch": 0.408831271151539, "grad_norm": 2.610154628753662, "learning_rate": 0.00013371167952560168, "loss": 1.2783, "step": 11416 }, { "epoch": 0.4088670832811073, "grad_norm": 2.161350727081299, "learning_rate": 0.00013370075925927158, "loss": 1.4277, "step": 11417 }, { "epoch": 0.4089028954106756, "grad_norm": 1.61553156375885, "learning_rate": 0.00013368983853954126, "loss": 1.5061, "step": 11418 }, { "epoch": 0.40893870754024386, "grad_norm": 1.382102370262146, "learning_rate": 0.00013367891736655764, "loss": 1.4812, "step": 11419 }, { "epoch": 0.4089745196698122, "grad_norm": 1.7254366874694824, "learning_rate": 0.0001336679957404676, "loss": 1.6011, "step": 11420 }, { "epoch": 0.40901033179938046, "grad_norm": 1.3416959047317505, "learning_rate": 0.00013365707366141814, "loss": 1.6771, "step": 11421 }, { "epoch": 0.4090461439289487, "grad_norm": 1.6707831621170044, "learning_rate": 0.00013364615112955612, "loss": 1.5072, "step": 11422 }, { "epoch": 0.409081956058517, "grad_norm": 2.198908567428589, "learning_rate": 0.0001336352281450286, "loss": 1.7351, "step": 11423 }, { "epoch": 0.4091177681880853, "grad_norm": 1.4816632270812988, "learning_rate": 0.0001336243047079824, "loss": 1.5977, "step": 11424 }, { "epoch": 0.4091535803176536, "grad_norm": 1.7870755195617676, "learning_rate": 0.00013361338081856457, "loss": 1.6438, "step": 11425 }, { "epoch": 0.40918939244722186, "grad_norm": 1.7458114624023438, "learning_rate": 0.0001336024564769221, "loss": 1.4006, "step": 11426 }, { "epoch": 0.4092252045767902, "grad_norm": 1.7427453994750977, "learning_rate": 0.00013359153168320188, "loss": 1.3585, "step": 11427 }, { "epoch": 0.40926101670635845, "grad_norm": 1.8659039735794067, "learning_rate": 0.00013358060643755098, "loss": 1.67, "step": 11428 }, { "epoch": 0.4092968288359267, "grad_norm": 1.4073402881622314, "learning_rate": 0.00013356968074011626, "loss": 1.4902, "step": 11429 }, { "epoch": 0.409332640965495, "grad_norm": 1.6928961277008057, "learning_rate": 0.00013355875459104485, "loss": 1.6261, "step": 11430 }, { "epoch": 0.4093684530950633, "grad_norm": 1.7611867189407349, "learning_rate": 0.00013354782799048366, "loss": 1.3424, "step": 11431 }, { "epoch": 0.4094042652246316, "grad_norm": 1.9710921049118042, "learning_rate": 0.00013353690093857972, "loss": 1.4281, "step": 11432 }, { "epoch": 0.40944007735419985, "grad_norm": 1.504773497581482, "learning_rate": 0.00013352597343548004, "loss": 1.4062, "step": 11433 }, { "epoch": 0.4094758894837682, "grad_norm": 1.5385361909866333, "learning_rate": 0.00013351504548133166, "loss": 1.7332, "step": 11434 }, { "epoch": 0.40951170161333644, "grad_norm": 2.2584147453308105, "learning_rate": 0.00013350411707628153, "loss": 1.75, "step": 11435 }, { "epoch": 0.4095475137429047, "grad_norm": 1.3944424390792847, "learning_rate": 0.00013349318822047674, "loss": 1.35, "step": 11436 }, { "epoch": 0.409583325872473, "grad_norm": 1.3100533485412598, "learning_rate": 0.00013348225891406432, "loss": 1.429, "step": 11437 }, { "epoch": 0.4096191380020413, "grad_norm": 2.255528688430786, "learning_rate": 0.00013347132915719127, "loss": 1.749, "step": 11438 }, { "epoch": 0.4096549501316096, "grad_norm": 2.328932523727417, "learning_rate": 0.0001334603989500047, "loss": 1.3934, "step": 11439 }, { "epoch": 0.40969076226117784, "grad_norm": 1.5605809688568115, "learning_rate": 0.00013344946829265157, "loss": 1.5377, "step": 11440 }, { "epoch": 0.40972657439074617, "grad_norm": 2.7059600353240967, "learning_rate": 0.000133438537185279, "loss": 1.8359, "step": 11441 }, { "epoch": 0.40976238652031444, "grad_norm": 1.4177329540252686, "learning_rate": 0.00013342760562803406, "loss": 1.5699, "step": 11442 }, { "epoch": 0.4097981986498827, "grad_norm": 2.09751296043396, "learning_rate": 0.0001334166736210638, "loss": 1.5161, "step": 11443 }, { "epoch": 0.409834010779451, "grad_norm": 1.62708580493927, "learning_rate": 0.00013340574116451533, "loss": 1.3609, "step": 11444 }, { "epoch": 0.4098698229090193, "grad_norm": 1.794075608253479, "learning_rate": 0.0001333948082585357, "loss": 1.4662, "step": 11445 }, { "epoch": 0.40990563503858757, "grad_norm": 1.4780937433242798, "learning_rate": 0.00013338387490327195, "loss": 1.3648, "step": 11446 }, { "epoch": 0.40994144716815584, "grad_norm": 1.2816733121871948, "learning_rate": 0.00013337294109887123, "loss": 1.3649, "step": 11447 }, { "epoch": 0.40997725929772416, "grad_norm": 1.4756011962890625, "learning_rate": 0.0001333620068454807, "loss": 1.4781, "step": 11448 }, { "epoch": 0.41001307142729243, "grad_norm": 1.638892412185669, "learning_rate": 0.00013335107214324733, "loss": 1.7207, "step": 11449 }, { "epoch": 0.4100488835568607, "grad_norm": 1.8446863889694214, "learning_rate": 0.00013334013699231836, "loss": 1.3898, "step": 11450 }, { "epoch": 0.41008469568642897, "grad_norm": 1.9317458868026733, "learning_rate": 0.0001333292013928408, "loss": 1.6734, "step": 11451 }, { "epoch": 0.4101205078159973, "grad_norm": 1.8174721002578735, "learning_rate": 0.00013331826534496188, "loss": 1.6215, "step": 11452 }, { "epoch": 0.41015631994556556, "grad_norm": 1.832736849784851, "learning_rate": 0.00013330732884882866, "loss": 1.4646, "step": 11453 }, { "epoch": 0.41019213207513383, "grad_norm": 2.605854034423828, "learning_rate": 0.0001332963919045883, "loss": 1.5914, "step": 11454 }, { "epoch": 0.41022794420470216, "grad_norm": 2.3421616554260254, "learning_rate": 0.00013328545451238793, "loss": 1.5082, "step": 11455 }, { "epoch": 0.4102637563342704, "grad_norm": 1.8743304014205933, "learning_rate": 0.00013327451667237468, "loss": 1.3477, "step": 11456 }, { "epoch": 0.4102995684638387, "grad_norm": 1.307234764099121, "learning_rate": 0.00013326357838469574, "loss": 1.5954, "step": 11457 }, { "epoch": 0.41033538059340696, "grad_norm": 1.5379959344863892, "learning_rate": 0.0001332526396494983, "loss": 1.4679, "step": 11458 }, { "epoch": 0.4103711927229753, "grad_norm": 1.7482596635818481, "learning_rate": 0.00013324170046692942, "loss": 1.5744, "step": 11459 }, { "epoch": 0.41040700485254356, "grad_norm": 1.8453181982040405, "learning_rate": 0.00013323076083713637, "loss": 1.4618, "step": 11460 }, { "epoch": 0.4104428169821118, "grad_norm": 2.0559778213500977, "learning_rate": 0.00013321982076026632, "loss": 1.632, "step": 11461 }, { "epoch": 0.41047862911168015, "grad_norm": 1.8249529600143433, "learning_rate": 0.0001332088802364664, "loss": 1.6765, "step": 11462 }, { "epoch": 0.4105144412412484, "grad_norm": 1.922978162765503, "learning_rate": 0.00013319793926588387, "loss": 1.1515, "step": 11463 }, { "epoch": 0.4105502533708167, "grad_norm": 1.7666670083999634, "learning_rate": 0.00013318699784866585, "loss": 1.6148, "step": 11464 }, { "epoch": 0.41058606550038496, "grad_norm": 1.6551225185394287, "learning_rate": 0.0001331760559849596, "loss": 1.2441, "step": 11465 }, { "epoch": 0.4106218776299533, "grad_norm": 1.6817725896835327, "learning_rate": 0.0001331651136749123, "loss": 1.3188, "step": 11466 }, { "epoch": 0.41065768975952155, "grad_norm": 1.4896317720413208, "learning_rate": 0.0001331541709186712, "loss": 1.3486, "step": 11467 }, { "epoch": 0.4106935018890898, "grad_norm": 1.92238450050354, "learning_rate": 0.00013314322771638346, "loss": 1.8901, "step": 11468 }, { "epoch": 0.41072931401865814, "grad_norm": 1.4980189800262451, "learning_rate": 0.00013313228406819637, "loss": 1.5268, "step": 11469 }, { "epoch": 0.4107651261482264, "grad_norm": 1.5852466821670532, "learning_rate": 0.00013312133997425712, "loss": 1.697, "step": 11470 }, { "epoch": 0.4108009382777947, "grad_norm": 1.6819641590118408, "learning_rate": 0.00013311039543471297, "loss": 1.4556, "step": 11471 }, { "epoch": 0.41083675040736295, "grad_norm": 1.6359261274337769, "learning_rate": 0.00013309945044971116, "loss": 1.2987, "step": 11472 }, { "epoch": 0.4108725625369313, "grad_norm": 1.8399405479431152, "learning_rate": 0.00013308850501939892, "loss": 1.6601, "step": 11473 }, { "epoch": 0.41090837466649954, "grad_norm": 1.4478241205215454, "learning_rate": 0.00013307755914392357, "loss": 1.2634, "step": 11474 }, { "epoch": 0.4109441867960678, "grad_norm": 1.807931661605835, "learning_rate": 0.0001330666128234323, "loss": 1.7326, "step": 11475 }, { "epoch": 0.41097999892563614, "grad_norm": 1.5771223306655884, "learning_rate": 0.0001330556660580724, "loss": 1.25, "step": 11476 }, { "epoch": 0.4110158110552044, "grad_norm": 1.909822702407837, "learning_rate": 0.00013304471884799116, "loss": 1.7395, "step": 11477 }, { "epoch": 0.4110516231847727, "grad_norm": 1.6032434701919556, "learning_rate": 0.00013303377119333587, "loss": 1.6306, "step": 11478 }, { "epoch": 0.41108743531434094, "grad_norm": 1.5883413553237915, "learning_rate": 0.0001330228230942538, "loss": 1.2959, "step": 11479 }, { "epoch": 0.41112324744390927, "grad_norm": 1.9156928062438965, "learning_rate": 0.00013301187455089223, "loss": 1.936, "step": 11480 }, { "epoch": 0.41115905957347754, "grad_norm": 3.1075665950775146, "learning_rate": 0.00013300092556339847, "loss": 1.8545, "step": 11481 }, { "epoch": 0.4111948717030458, "grad_norm": 1.515472412109375, "learning_rate": 0.00013298997613191978, "loss": 1.5118, "step": 11482 }, { "epoch": 0.41123068383261413, "grad_norm": 1.5612744092941284, "learning_rate": 0.00013297902625660358, "loss": 1.3228, "step": 11483 }, { "epoch": 0.4112664959621824, "grad_norm": 2.1935417652130127, "learning_rate": 0.00013296807593759708, "loss": 1.5941, "step": 11484 }, { "epoch": 0.41130230809175067, "grad_norm": 1.5191771984100342, "learning_rate": 0.0001329571251750477, "loss": 1.5533, "step": 11485 }, { "epoch": 0.41133812022131894, "grad_norm": 1.5615873336791992, "learning_rate": 0.00013294617396910266, "loss": 1.7188, "step": 11486 }, { "epoch": 0.41137393235088726, "grad_norm": 1.4643330574035645, "learning_rate": 0.00013293522231990935, "loss": 1.582, "step": 11487 }, { "epoch": 0.41140974448045553, "grad_norm": 2.06705379486084, "learning_rate": 0.00013292427022761514, "loss": 1.5501, "step": 11488 }, { "epoch": 0.4114455566100238, "grad_norm": 1.5073274374008179, "learning_rate": 0.0001329133176923673, "loss": 1.5135, "step": 11489 }, { "epoch": 0.4114813687395921, "grad_norm": 1.912886381149292, "learning_rate": 0.00013290236471431326, "loss": 1.4496, "step": 11490 }, { "epoch": 0.4115171808691604, "grad_norm": 1.8361574411392212, "learning_rate": 0.00013289141129360033, "loss": 1.6251, "step": 11491 }, { "epoch": 0.41155299299872866, "grad_norm": 2.233924388885498, "learning_rate": 0.0001328804574303759, "loss": 1.8372, "step": 11492 }, { "epoch": 0.41158880512829693, "grad_norm": 1.716523289680481, "learning_rate": 0.0001328695031247873, "loss": 1.6532, "step": 11493 }, { "epoch": 0.41162461725786526, "grad_norm": 1.9167834520339966, "learning_rate": 0.00013285854837698195, "loss": 1.5691, "step": 11494 }, { "epoch": 0.4116604293874335, "grad_norm": 1.637646198272705, "learning_rate": 0.0001328475931871072, "loss": 1.5658, "step": 11495 }, { "epoch": 0.4116962415170018, "grad_norm": 1.4613744020462036, "learning_rate": 0.0001328366375553105, "loss": 1.2251, "step": 11496 }, { "epoch": 0.4117320536465701, "grad_norm": 2.3519818782806396, "learning_rate": 0.00013282568148173917, "loss": 1.4193, "step": 11497 }, { "epoch": 0.4117678657761384, "grad_norm": 1.911476731300354, "learning_rate": 0.00013281472496654064, "loss": 1.3368, "step": 11498 }, { "epoch": 0.41180367790570666, "grad_norm": 1.8679819107055664, "learning_rate": 0.0001328037680098623, "loss": 1.4186, "step": 11499 }, { "epoch": 0.4118394900352749, "grad_norm": 1.6194400787353516, "learning_rate": 0.00013279281061185158, "loss": 1.4609, "step": 11500 }, { "epoch": 0.41187530216484325, "grad_norm": 1.498268961906433, "learning_rate": 0.0001327818527726559, "loss": 1.4629, "step": 11501 }, { "epoch": 0.4119111142944115, "grad_norm": 1.735053300857544, "learning_rate": 0.00013277089449242267, "loss": 1.3692, "step": 11502 }, { "epoch": 0.4119469264239798, "grad_norm": 1.634087085723877, "learning_rate": 0.00013275993577129932, "loss": 1.7921, "step": 11503 }, { "epoch": 0.4119827385535481, "grad_norm": 2.1054511070251465, "learning_rate": 0.0001327489766094333, "loss": 1.3704, "step": 11504 }, { "epoch": 0.4120185506831164, "grad_norm": 1.6996413469314575, "learning_rate": 0.00013273801700697206, "loss": 1.7709, "step": 11505 }, { "epoch": 0.41205436281268465, "grad_norm": 1.709455966949463, "learning_rate": 0.00013272705696406302, "loss": 1.5758, "step": 11506 }, { "epoch": 0.4120901749422529, "grad_norm": 1.550470232963562, "learning_rate": 0.00013271609648085367, "loss": 1.5027, "step": 11507 }, { "epoch": 0.41212598707182124, "grad_norm": 1.6597813367843628, "learning_rate": 0.0001327051355574914, "loss": 1.7267, "step": 11508 }, { "epoch": 0.4121617992013895, "grad_norm": 1.2792216539382935, "learning_rate": 0.0001326941741941237, "loss": 1.3561, "step": 11509 }, { "epoch": 0.4121976113309578, "grad_norm": 1.414903163909912, "learning_rate": 0.00013268321239089809, "loss": 1.5848, "step": 11510 }, { "epoch": 0.4122334234605261, "grad_norm": 1.810530185699463, "learning_rate": 0.00013267225014796202, "loss": 1.4441, "step": 11511 }, { "epoch": 0.4122692355900944, "grad_norm": 1.5842233896255493, "learning_rate": 0.00013266128746546296, "loss": 1.1982, "step": 11512 }, { "epoch": 0.41230504771966264, "grad_norm": 1.3252923488616943, "learning_rate": 0.0001326503243435484, "loss": 1.4864, "step": 11513 }, { "epoch": 0.4123408598492309, "grad_norm": 1.435280203819275, "learning_rate": 0.00013263936078236586, "loss": 1.2947, "step": 11514 }, { "epoch": 0.41237667197879924, "grad_norm": 2.3701577186584473, "learning_rate": 0.00013262839678206283, "loss": 1.7331, "step": 11515 }, { "epoch": 0.4124124841083675, "grad_norm": 1.5976884365081787, "learning_rate": 0.00013261743234278678, "loss": 1.4953, "step": 11516 }, { "epoch": 0.4124482962379358, "grad_norm": 1.6659822463989258, "learning_rate": 0.00013260646746468527, "loss": 1.1783, "step": 11517 }, { "epoch": 0.4124841083675041, "grad_norm": 1.7905832529067993, "learning_rate": 0.0001325955021479058, "loss": 1.5895, "step": 11518 }, { "epoch": 0.41251992049707237, "grad_norm": 1.614429235458374, "learning_rate": 0.00013258453639259586, "loss": 1.3956, "step": 11519 }, { "epoch": 0.41255573262664064, "grad_norm": 1.761165738105774, "learning_rate": 0.00013257357019890307, "loss": 1.3419, "step": 11520 }, { "epoch": 0.4125915447562089, "grad_norm": 2.056187868118286, "learning_rate": 0.00013256260356697485, "loss": 1.4939, "step": 11521 }, { "epoch": 0.41262735688577723, "grad_norm": 1.6465550661087036, "learning_rate": 0.00013255163649695886, "loss": 1.6814, "step": 11522 }, { "epoch": 0.4126631690153455, "grad_norm": 2.2140185832977295, "learning_rate": 0.00013254066898900257, "loss": 1.5524, "step": 11523 }, { "epoch": 0.41269898114491377, "grad_norm": 1.8904930353164673, "learning_rate": 0.00013252970104325352, "loss": 1.4541, "step": 11524 }, { "epoch": 0.4127347932744821, "grad_norm": 1.2835383415222168, "learning_rate": 0.00013251873265985936, "loss": 1.469, "step": 11525 }, { "epoch": 0.41277060540405036, "grad_norm": 1.3030658960342407, "learning_rate": 0.00013250776383896752, "loss": 1.3568, "step": 11526 }, { "epoch": 0.41280641753361863, "grad_norm": 1.3432520627975464, "learning_rate": 0.00013249679458072572, "loss": 1.4884, "step": 11527 }, { "epoch": 0.4128422296631869, "grad_norm": 2.3544211387634277, "learning_rate": 0.00013248582488528142, "loss": 1.2597, "step": 11528 }, { "epoch": 0.4128780417927552, "grad_norm": 1.6606460809707642, "learning_rate": 0.0001324748547527823, "loss": 1.4726, "step": 11529 }, { "epoch": 0.4129138539223235, "grad_norm": 1.4075069427490234, "learning_rate": 0.00013246388418337586, "loss": 1.2732, "step": 11530 }, { "epoch": 0.41294966605189176, "grad_norm": 1.6073061227798462, "learning_rate": 0.00013245291317720974, "loss": 1.3229, "step": 11531 }, { "epoch": 0.4129854781814601, "grad_norm": 1.994727373123169, "learning_rate": 0.00013244194173443155, "loss": 1.3324, "step": 11532 }, { "epoch": 0.41302129031102836, "grad_norm": 1.824912190437317, "learning_rate": 0.00013243096985518887, "loss": 1.3809, "step": 11533 }, { "epoch": 0.4130571024405966, "grad_norm": 1.7901192903518677, "learning_rate": 0.00013241999753962932, "loss": 1.2446, "step": 11534 }, { "epoch": 0.4130929145701649, "grad_norm": 1.5490052700042725, "learning_rate": 0.00013240902478790052, "loss": 1.373, "step": 11535 }, { "epoch": 0.4131287266997332, "grad_norm": 1.581001877784729, "learning_rate": 0.0001323980516001501, "loss": 1.3801, "step": 11536 }, { "epoch": 0.4131645388293015, "grad_norm": 2.0129377841949463, "learning_rate": 0.00013238707797652569, "loss": 1.6714, "step": 11537 }, { "epoch": 0.41320035095886976, "grad_norm": 1.7160298824310303, "learning_rate": 0.0001323761039171749, "loss": 1.2577, "step": 11538 }, { "epoch": 0.4132361630884381, "grad_norm": 1.5854284763336182, "learning_rate": 0.00013236512942224545, "loss": 1.5169, "step": 11539 }, { "epoch": 0.41327197521800635, "grad_norm": 2.3109540939331055, "learning_rate": 0.0001323541544918849, "loss": 1.8822, "step": 11540 }, { "epoch": 0.4133077873475746, "grad_norm": 2.023212194442749, "learning_rate": 0.00013234317912624093, "loss": 1.5002, "step": 11541 }, { "epoch": 0.4133435994771429, "grad_norm": 2.173719644546509, "learning_rate": 0.0001323322033254612, "loss": 1.6894, "step": 11542 }, { "epoch": 0.4133794116067112, "grad_norm": 1.548795223236084, "learning_rate": 0.00013232122708969337, "loss": 1.3606, "step": 11543 }, { "epoch": 0.4134152237362795, "grad_norm": 1.749751329421997, "learning_rate": 0.00013231025041908514, "loss": 1.8365, "step": 11544 }, { "epoch": 0.41345103586584775, "grad_norm": 1.4382846355438232, "learning_rate": 0.00013229927331378418, "loss": 1.3604, "step": 11545 }, { "epoch": 0.4134868479954161, "grad_norm": 2.0954902172088623, "learning_rate": 0.0001322882957739381, "loss": 1.538, "step": 11546 }, { "epoch": 0.41352266012498434, "grad_norm": 1.5251587629318237, "learning_rate": 0.00013227731779969472, "loss": 1.2223, "step": 11547 }, { "epoch": 0.4135584722545526, "grad_norm": 2.0528721809387207, "learning_rate": 0.00013226633939120164, "loss": 1.2464, "step": 11548 }, { "epoch": 0.4135942843841209, "grad_norm": 1.6846539974212646, "learning_rate": 0.00013225536054860658, "loss": 1.3667, "step": 11549 }, { "epoch": 0.4136300965136892, "grad_norm": 2.08156418800354, "learning_rate": 0.00013224438127205725, "loss": 1.2521, "step": 11550 }, { "epoch": 0.4136659086432575, "grad_norm": 1.3722084760665894, "learning_rate": 0.0001322334015617014, "loss": 1.5203, "step": 11551 }, { "epoch": 0.41370172077282574, "grad_norm": 1.3709852695465088, "learning_rate": 0.00013222242141768664, "loss": 0.9214, "step": 11552 }, { "epoch": 0.41373753290239407, "grad_norm": 1.7349940538406372, "learning_rate": 0.00013221144084016082, "loss": 1.5187, "step": 11553 }, { "epoch": 0.41377334503196234, "grad_norm": 1.7656437158584595, "learning_rate": 0.00013220045982927157, "loss": 1.4667, "step": 11554 }, { "epoch": 0.4138091571615306, "grad_norm": 1.6982886791229248, "learning_rate": 0.00013218947838516672, "loss": 1.5724, "step": 11555 }, { "epoch": 0.4138449692910989, "grad_norm": 2.2578091621398926, "learning_rate": 0.0001321784965079939, "loss": 1.9695, "step": 11556 }, { "epoch": 0.4138807814206672, "grad_norm": 1.5725754499435425, "learning_rate": 0.00013216751419790096, "loss": 1.4167, "step": 11557 }, { "epoch": 0.41391659355023547, "grad_norm": 1.307800054550171, "learning_rate": 0.00013215653145503558, "loss": 1.4289, "step": 11558 }, { "epoch": 0.41395240567980374, "grad_norm": 1.5263270139694214, "learning_rate": 0.00013214554827954556, "loss": 1.4657, "step": 11559 }, { "epoch": 0.41398821780937206, "grad_norm": 1.9524672031402588, "learning_rate": 0.00013213456467157868, "loss": 1.4146, "step": 11560 }, { "epoch": 0.41402402993894033, "grad_norm": 1.5733195543289185, "learning_rate": 0.00013212358063128266, "loss": 1.2555, "step": 11561 }, { "epoch": 0.4140598420685086, "grad_norm": 1.4676419496536255, "learning_rate": 0.0001321125961588053, "loss": 1.4726, "step": 11562 }, { "epoch": 0.41409565419807687, "grad_norm": 2.0057151317596436, "learning_rate": 0.00013210161125429436, "loss": 1.6748, "step": 11563 }, { "epoch": 0.4141314663276452, "grad_norm": 1.7160898447036743, "learning_rate": 0.0001320906259178977, "loss": 1.6463, "step": 11564 }, { "epoch": 0.41416727845721346, "grad_norm": 1.602569818496704, "learning_rate": 0.00013207964014976299, "loss": 1.6806, "step": 11565 }, { "epoch": 0.41420309058678173, "grad_norm": 1.489131212234497, "learning_rate": 0.00013206865395003816, "loss": 1.2136, "step": 11566 }, { "epoch": 0.41423890271635005, "grad_norm": 1.3555222749710083, "learning_rate": 0.00013205766731887094, "loss": 1.4043, "step": 11567 }, { "epoch": 0.4142747148459183, "grad_norm": 2.4561402797698975, "learning_rate": 0.00013204668025640915, "loss": 1.5604, "step": 11568 }, { "epoch": 0.4143105269754866, "grad_norm": 1.891719102859497, "learning_rate": 0.00013203569276280062, "loss": 1.7261, "step": 11569 }, { "epoch": 0.41434633910505486, "grad_norm": 1.5649393796920776, "learning_rate": 0.00013202470483819316, "loss": 1.3593, "step": 11570 }, { "epoch": 0.4143821512346232, "grad_norm": 1.5750397443771362, "learning_rate": 0.00013201371648273463, "loss": 1.5991, "step": 11571 }, { "epoch": 0.41441796336419146, "grad_norm": 2.288243055343628, "learning_rate": 0.00013200272769657283, "loss": 1.5515, "step": 11572 }, { "epoch": 0.4144537754937597, "grad_norm": 1.4258782863616943, "learning_rate": 0.00013199173847985559, "loss": 1.3312, "step": 11573 }, { "epoch": 0.41448958762332805, "grad_norm": 1.5753511190414429, "learning_rate": 0.0001319807488327308, "loss": 1.4183, "step": 11574 }, { "epoch": 0.4145253997528963, "grad_norm": 1.7554752826690674, "learning_rate": 0.00013196975875534624, "loss": 1.6035, "step": 11575 }, { "epoch": 0.4145612118824646, "grad_norm": 2.1951706409454346, "learning_rate": 0.00013195876824784988, "loss": 1.4181, "step": 11576 }, { "epoch": 0.41459702401203286, "grad_norm": 1.4915632009506226, "learning_rate": 0.00013194777731038946, "loss": 1.5221, "step": 11577 }, { "epoch": 0.4146328361416012, "grad_norm": 1.97896409034729, "learning_rate": 0.00013193678594311295, "loss": 1.6528, "step": 11578 }, { "epoch": 0.41466864827116945, "grad_norm": 1.9763433933258057, "learning_rate": 0.00013192579414616815, "loss": 1.324, "step": 11579 }, { "epoch": 0.4147044604007377, "grad_norm": 1.8413565158843994, "learning_rate": 0.000131914801919703, "loss": 1.4303, "step": 11580 }, { "epoch": 0.41474027253030604, "grad_norm": 1.5440016984939575, "learning_rate": 0.0001319038092638653, "loss": 1.5037, "step": 11581 }, { "epoch": 0.4147760846598743, "grad_norm": 1.447457194328308, "learning_rate": 0.00013189281617880308, "loss": 1.3649, "step": 11582 }, { "epoch": 0.4148118967894426, "grad_norm": 1.756569743156433, "learning_rate": 0.0001318818226646641, "loss": 1.4381, "step": 11583 }, { "epoch": 0.41484770891901085, "grad_norm": 2.2443044185638428, "learning_rate": 0.00013187082872159636, "loss": 1.5342, "step": 11584 }, { "epoch": 0.4148835210485792, "grad_norm": 1.3705558776855469, "learning_rate": 0.0001318598343497477, "loss": 1.2355, "step": 11585 }, { "epoch": 0.41491933317814744, "grad_norm": 2.452343463897705, "learning_rate": 0.00013184883954926607, "loss": 1.2603, "step": 11586 }, { "epoch": 0.4149551453077157, "grad_norm": 1.5595102310180664, "learning_rate": 0.0001318378443202994, "loss": 1.6226, "step": 11587 }, { "epoch": 0.414990957437284, "grad_norm": 2.130498170852661, "learning_rate": 0.00013182684866299557, "loss": 1.63, "step": 11588 }, { "epoch": 0.4150267695668523, "grad_norm": 1.8103166818618774, "learning_rate": 0.00013181585257750257, "loss": 1.4003, "step": 11589 }, { "epoch": 0.4150625816964206, "grad_norm": 1.686850905418396, "learning_rate": 0.0001318048560639683, "loss": 1.3748, "step": 11590 }, { "epoch": 0.41509839382598884, "grad_norm": 1.4466736316680908, "learning_rate": 0.00013179385912254072, "loss": 1.5537, "step": 11591 }, { "epoch": 0.41513420595555717, "grad_norm": 1.7150828838348389, "learning_rate": 0.00013178286175336777, "loss": 1.1753, "step": 11592 }, { "epoch": 0.41517001808512544, "grad_norm": 1.8393244743347168, "learning_rate": 0.00013177186395659743, "loss": 1.7963, "step": 11593 }, { "epoch": 0.4152058302146937, "grad_norm": 1.8172906637191772, "learning_rate": 0.00013176086573237766, "loss": 1.5713, "step": 11594 }, { "epoch": 0.415241642344262, "grad_norm": 1.7263849973678589, "learning_rate": 0.0001317498670808564, "loss": 1.6143, "step": 11595 }, { "epoch": 0.4152774544738303, "grad_norm": 1.7366745471954346, "learning_rate": 0.0001317388680021816, "loss": 1.6609, "step": 11596 }, { "epoch": 0.41531326660339857, "grad_norm": 1.5991458892822266, "learning_rate": 0.00013172786849650133, "loss": 1.6754, "step": 11597 }, { "epoch": 0.41534907873296684, "grad_norm": 1.716823697090149, "learning_rate": 0.00013171686856396344, "loss": 1.608, "step": 11598 }, { "epoch": 0.41538489086253516, "grad_norm": 1.7387841939926147, "learning_rate": 0.00013170586820471605, "loss": 1.3947, "step": 11599 }, { "epoch": 0.41542070299210343, "grad_norm": 1.3563843965530396, "learning_rate": 0.00013169486741890706, "loss": 1.4469, "step": 11600 }, { "epoch": 0.4154565151216717, "grad_norm": 1.6906555891036987, "learning_rate": 0.0001316838662066845, "loss": 1.3223, "step": 11601 }, { "epoch": 0.41549232725123997, "grad_norm": 1.6035621166229248, "learning_rate": 0.00013167286456819646, "loss": 1.6543, "step": 11602 }, { "epoch": 0.4155281393808083, "grad_norm": 1.3639655113220215, "learning_rate": 0.00013166186250359086, "loss": 1.6864, "step": 11603 }, { "epoch": 0.41556395151037656, "grad_norm": 2.101548910140991, "learning_rate": 0.00013165086001301575, "loss": 1.5005, "step": 11604 }, { "epoch": 0.41559976363994483, "grad_norm": 1.2370082139968872, "learning_rate": 0.0001316398570966191, "loss": 1.4163, "step": 11605 }, { "epoch": 0.41563557576951315, "grad_norm": 1.4403676986694336, "learning_rate": 0.000131628853754549, "loss": 1.7009, "step": 11606 }, { "epoch": 0.4156713878990814, "grad_norm": 1.7065383195877075, "learning_rate": 0.00013161784998695349, "loss": 1.6103, "step": 11607 }, { "epoch": 0.4157072000286497, "grad_norm": 3.6323750019073486, "learning_rate": 0.00013160684579398057, "loss": 1.4806, "step": 11608 }, { "epoch": 0.41574301215821796, "grad_norm": 1.4103000164031982, "learning_rate": 0.00013159584117577831, "loss": 1.3592, "step": 11609 }, { "epoch": 0.4157788242877863, "grad_norm": 1.6261008977890015, "learning_rate": 0.0001315848361324948, "loss": 1.591, "step": 11610 }, { "epoch": 0.41581463641735456, "grad_norm": 2.0343966484069824, "learning_rate": 0.000131573830664278, "loss": 1.6907, "step": 11611 }, { "epoch": 0.4158504485469228, "grad_norm": 1.7296793460845947, "learning_rate": 0.0001315628247712761, "loss": 1.5331, "step": 11612 }, { "epoch": 0.41588626067649115, "grad_norm": 1.7771193981170654, "learning_rate": 0.0001315518184536371, "loss": 1.5747, "step": 11613 }, { "epoch": 0.4159220728060594, "grad_norm": 1.993929147720337, "learning_rate": 0.00013154081171150902, "loss": 1.7395, "step": 11614 }, { "epoch": 0.4159578849356277, "grad_norm": 1.6930261850357056, "learning_rate": 0.00013152980454504007, "loss": 1.7027, "step": 11615 }, { "epoch": 0.41599369706519596, "grad_norm": 2.1392862796783447, "learning_rate": 0.00013151879695437823, "loss": 2.0039, "step": 11616 }, { "epoch": 0.4160295091947643, "grad_norm": 1.4961529970169067, "learning_rate": 0.00013150778893967165, "loss": 1.8718, "step": 11617 }, { "epoch": 0.41606532132433255, "grad_norm": 2.090205430984497, "learning_rate": 0.0001314967805010684, "loss": 1.7351, "step": 11618 }, { "epoch": 0.4161011334539008, "grad_norm": 1.9120638370513916, "learning_rate": 0.0001314857716387166, "loss": 1.6389, "step": 11619 }, { "epoch": 0.41613694558346914, "grad_norm": 1.6917821168899536, "learning_rate": 0.00013147476235276438, "loss": 1.5383, "step": 11620 }, { "epoch": 0.4161727577130374, "grad_norm": 1.3282452821731567, "learning_rate": 0.00013146375264335978, "loss": 1.4817, "step": 11621 }, { "epoch": 0.4162085698426057, "grad_norm": 1.9482810497283936, "learning_rate": 0.00013145274251065103, "loss": 1.5597, "step": 11622 }, { "epoch": 0.41624438197217395, "grad_norm": 1.5540615320205688, "learning_rate": 0.00013144173195478616, "loss": 1.5281, "step": 11623 }, { "epoch": 0.4162801941017423, "grad_norm": 1.9348515272140503, "learning_rate": 0.0001314307209759134, "loss": 1.4764, "step": 11624 }, { "epoch": 0.41631600623131054, "grad_norm": 2.240257978439331, "learning_rate": 0.00013141970957418074, "loss": 1.5697, "step": 11625 }, { "epoch": 0.4163518183608788, "grad_norm": 1.4661803245544434, "learning_rate": 0.0001314086977497365, "loss": 1.2688, "step": 11626 }, { "epoch": 0.41638763049044714, "grad_norm": 1.7400275468826294, "learning_rate": 0.0001313976855027287, "loss": 1.4783, "step": 11627 }, { "epoch": 0.4164234426200154, "grad_norm": 1.930822730064392, "learning_rate": 0.00013138667283330556, "loss": 1.6808, "step": 11628 }, { "epoch": 0.4164592547495837, "grad_norm": 1.6102182865142822, "learning_rate": 0.00013137565974161524, "loss": 1.7887, "step": 11629 }, { "epoch": 0.41649506687915194, "grad_norm": 1.7597026824951172, "learning_rate": 0.00013136464622780583, "loss": 1.6061, "step": 11630 }, { "epoch": 0.41653087900872027, "grad_norm": 2.7631938457489014, "learning_rate": 0.00013135363229202564, "loss": 1.6641, "step": 11631 }, { "epoch": 0.41656669113828854, "grad_norm": 1.9850269556045532, "learning_rate": 0.0001313426179344227, "loss": 1.2111, "step": 11632 }, { "epoch": 0.4166025032678568, "grad_norm": 1.4527435302734375, "learning_rate": 0.0001313316031551453, "loss": 1.5459, "step": 11633 }, { "epoch": 0.41663831539742513, "grad_norm": 1.589457631111145, "learning_rate": 0.00013132058795434158, "loss": 1.6198, "step": 11634 }, { "epoch": 0.4166741275269934, "grad_norm": 1.4151225090026855, "learning_rate": 0.0001313095723321598, "loss": 1.4616, "step": 11635 }, { "epoch": 0.41670993965656167, "grad_norm": 1.7117297649383545, "learning_rate": 0.00013129855628874805, "loss": 1.6629, "step": 11636 }, { "epoch": 0.41674575178612994, "grad_norm": 2.363799571990967, "learning_rate": 0.0001312875398242546, "loss": 1.6905, "step": 11637 }, { "epoch": 0.41678156391569826, "grad_norm": 1.919259786605835, "learning_rate": 0.0001312765229388277, "loss": 1.4872, "step": 11638 }, { "epoch": 0.41681737604526653, "grad_norm": 2.0056421756744385, "learning_rate": 0.00013126550563261551, "loss": 1.5765, "step": 11639 }, { "epoch": 0.4168531881748348, "grad_norm": 1.4317076206207275, "learning_rate": 0.00013125448790576627, "loss": 1.3747, "step": 11640 }, { "epoch": 0.4168890003044031, "grad_norm": 1.9106783866882324, "learning_rate": 0.00013124346975842822, "loss": 1.3107, "step": 11641 }, { "epoch": 0.4169248124339714, "grad_norm": 3.9579901695251465, "learning_rate": 0.00013123245119074956, "loss": 1.5716, "step": 11642 }, { "epoch": 0.41696062456353966, "grad_norm": 2.3893094062805176, "learning_rate": 0.00013122143220287854, "loss": 1.5343, "step": 11643 }, { "epoch": 0.41699643669310793, "grad_norm": 1.485538363456726, "learning_rate": 0.00013121041279496348, "loss": 1.5387, "step": 11644 }, { "epoch": 0.41703224882267625, "grad_norm": 1.8348642587661743, "learning_rate": 0.00013119939296715253, "loss": 1.653, "step": 11645 }, { "epoch": 0.4170680609522445, "grad_norm": 1.5645591020584106, "learning_rate": 0.00013118837271959403, "loss": 1.6355, "step": 11646 }, { "epoch": 0.4171038730818128, "grad_norm": 1.9574569463729858, "learning_rate": 0.0001311773520524362, "loss": 1.6933, "step": 11647 }, { "epoch": 0.4171396852113811, "grad_norm": 2.693382740020752, "learning_rate": 0.00013116633096582728, "loss": 1.2307, "step": 11648 }, { "epoch": 0.4171754973409494, "grad_norm": 2.2191860675811768, "learning_rate": 0.0001311553094599156, "loss": 1.5716, "step": 11649 }, { "epoch": 0.41721130947051766, "grad_norm": 1.8328752517700195, "learning_rate": 0.00013114428753484942, "loss": 1.4303, "step": 11650 }, { "epoch": 0.4172471216000859, "grad_norm": 1.64580500125885, "learning_rate": 0.00013113326519077702, "loss": 1.8983, "step": 11651 }, { "epoch": 0.41728293372965425, "grad_norm": 1.5777573585510254, "learning_rate": 0.0001311222424278467, "loss": 1.3932, "step": 11652 }, { "epoch": 0.4173187458592225, "grad_norm": 1.5518149137496948, "learning_rate": 0.00013111121924620672, "loss": 1.3117, "step": 11653 }, { "epoch": 0.4173545579887908, "grad_norm": 1.7711327075958252, "learning_rate": 0.00013110019564600546, "loss": 1.4527, "step": 11654 }, { "epoch": 0.4173903701183591, "grad_norm": 1.4793881177902222, "learning_rate": 0.00013108917162739115, "loss": 1.7026, "step": 11655 }, { "epoch": 0.4174261822479274, "grad_norm": 1.7926698923110962, "learning_rate": 0.00013107814719051216, "loss": 1.4584, "step": 11656 }, { "epoch": 0.41746199437749565, "grad_norm": 1.60378897190094, "learning_rate": 0.0001310671223355168, "loss": 1.6985, "step": 11657 }, { "epoch": 0.4174978065070639, "grad_norm": 1.862740397453308, "learning_rate": 0.00013105609706255336, "loss": 1.6307, "step": 11658 }, { "epoch": 0.41753361863663224, "grad_norm": 1.755902647972107, "learning_rate": 0.00013104507137177022, "loss": 1.6147, "step": 11659 }, { "epoch": 0.4175694307662005, "grad_norm": 1.6584196090698242, "learning_rate": 0.00013103404526331564, "loss": 1.4459, "step": 11660 }, { "epoch": 0.4176052428957688, "grad_norm": 1.5859105587005615, "learning_rate": 0.00013102301873733807, "loss": 1.5394, "step": 11661 }, { "epoch": 0.4176410550253371, "grad_norm": 1.9892284870147705, "learning_rate": 0.00013101199179398572, "loss": 1.3006, "step": 11662 }, { "epoch": 0.4176768671549054, "grad_norm": 1.620436191558838, "learning_rate": 0.0001310009644334071, "loss": 1.4348, "step": 11663 }, { "epoch": 0.41771267928447364, "grad_norm": 1.9087800979614258, "learning_rate": 0.00013098993665575047, "loss": 1.725, "step": 11664 }, { "epoch": 0.4177484914140419, "grad_norm": 1.4620959758758545, "learning_rate": 0.0001309789084611642, "loss": 1.4238, "step": 11665 }, { "epoch": 0.41778430354361024, "grad_norm": 1.6417380571365356, "learning_rate": 0.00013096787984979673, "loss": 1.9673, "step": 11666 }, { "epoch": 0.4178201156731785, "grad_norm": 1.8227124214172363, "learning_rate": 0.00013095685082179632, "loss": 1.5281, "step": 11667 }, { "epoch": 0.4178559278027468, "grad_norm": 1.9912152290344238, "learning_rate": 0.00013094582137731145, "loss": 1.3199, "step": 11668 }, { "epoch": 0.4178917399323151, "grad_norm": 1.9016451835632324, "learning_rate": 0.00013093479151649043, "loss": 1.5948, "step": 11669 }, { "epoch": 0.41792755206188337, "grad_norm": 1.4120560884475708, "learning_rate": 0.00013092376123948174, "loss": 1.3703, "step": 11670 }, { "epoch": 0.41796336419145164, "grad_norm": 1.8982212543487549, "learning_rate": 0.0001309127305464337, "loss": 1.149, "step": 11671 }, { "epoch": 0.4179991763210199, "grad_norm": 1.4889423847198486, "learning_rate": 0.00013090169943749476, "loss": 1.5313, "step": 11672 }, { "epoch": 0.41803498845058823, "grad_norm": 1.54822838306427, "learning_rate": 0.00013089066791281332, "loss": 1.4798, "step": 11673 }, { "epoch": 0.4180708005801565, "grad_norm": 2.2774507999420166, "learning_rate": 0.00013087963597253777, "loss": 1.3878, "step": 11674 }, { "epoch": 0.41810661270972477, "grad_norm": 1.52279794216156, "learning_rate": 0.00013086860361681657, "loss": 1.3788, "step": 11675 }, { "epoch": 0.4181424248392931, "grad_norm": 1.921139121055603, "learning_rate": 0.00013085757084579808, "loss": 1.6913, "step": 11676 }, { "epoch": 0.41817823696886136, "grad_norm": 1.6303210258483887, "learning_rate": 0.00013084653765963085, "loss": 1.636, "step": 11677 }, { "epoch": 0.41821404909842963, "grad_norm": 1.3981053829193115, "learning_rate": 0.0001308355040584632, "loss": 1.4341, "step": 11678 }, { "epoch": 0.4182498612279979, "grad_norm": 1.5886470079421997, "learning_rate": 0.0001308244700424436, "loss": 1.5647, "step": 11679 }, { "epoch": 0.4182856733575662, "grad_norm": 1.4517158269882202, "learning_rate": 0.00013081343561172055, "loss": 1.4405, "step": 11680 }, { "epoch": 0.4183214854871345, "grad_norm": 1.6235915422439575, "learning_rate": 0.00013080240076644245, "loss": 1.6848, "step": 11681 }, { "epoch": 0.41835729761670276, "grad_norm": 1.8105449676513672, "learning_rate": 0.0001307913655067578, "loss": 1.4255, "step": 11682 }, { "epoch": 0.4183931097462711, "grad_norm": 1.8867751359939575, "learning_rate": 0.00013078032983281505, "loss": 1.8674, "step": 11683 }, { "epoch": 0.41842892187583935, "grad_norm": 1.5423702001571655, "learning_rate": 0.00013076929374476265, "loss": 1.6115, "step": 11684 }, { "epoch": 0.4184647340054076, "grad_norm": 1.234898567199707, "learning_rate": 0.00013075825724274907, "loss": 1.3527, "step": 11685 }, { "epoch": 0.4185005461349759, "grad_norm": 1.7368626594543457, "learning_rate": 0.00013074722032692287, "loss": 1.4389, "step": 11686 }, { "epoch": 0.4185363582645442, "grad_norm": 2.3689353466033936, "learning_rate": 0.00013073618299743242, "loss": 1.4973, "step": 11687 }, { "epoch": 0.4185721703941125, "grad_norm": 1.5590380430221558, "learning_rate": 0.00013072514525442632, "loss": 1.482, "step": 11688 }, { "epoch": 0.41860798252368076, "grad_norm": 1.8684630393981934, "learning_rate": 0.000130714107098053, "loss": 1.4597, "step": 11689 }, { "epoch": 0.4186437946532491, "grad_norm": 1.368911623954773, "learning_rate": 0.000130703068528461, "loss": 1.4001, "step": 11690 }, { "epoch": 0.41867960678281735, "grad_norm": 2.009352922439575, "learning_rate": 0.00013069202954579882, "loss": 1.6073, "step": 11691 }, { "epoch": 0.4187154189123856, "grad_norm": 1.8053749799728394, "learning_rate": 0.00013068099015021498, "loss": 1.2745, "step": 11692 }, { "epoch": 0.4187512310419539, "grad_norm": 2.223372459411621, "learning_rate": 0.00013066995034185798, "loss": 1.7715, "step": 11693 }, { "epoch": 0.4187870431715222, "grad_norm": 1.833401083946228, "learning_rate": 0.00013065891012087634, "loss": 1.7072, "step": 11694 }, { "epoch": 0.4188228553010905, "grad_norm": 1.3824347257614136, "learning_rate": 0.00013064786948741863, "loss": 1.4636, "step": 11695 }, { "epoch": 0.41885866743065875, "grad_norm": 1.4842058420181274, "learning_rate": 0.00013063682844163338, "loss": 1.2638, "step": 11696 }, { "epoch": 0.4188944795602271, "grad_norm": 1.8669413328170776, "learning_rate": 0.00013062578698366909, "loss": 1.5242, "step": 11697 }, { "epoch": 0.41893029168979534, "grad_norm": 1.6303125619888306, "learning_rate": 0.00013061474511367435, "loss": 1.6688, "step": 11698 }, { "epoch": 0.4189661038193636, "grad_norm": 1.7029786109924316, "learning_rate": 0.00013060370283179772, "loss": 1.7938, "step": 11699 }, { "epoch": 0.4190019159489319, "grad_norm": 2.1118316650390625, "learning_rate": 0.00013059266013818775, "loss": 1.5609, "step": 11700 }, { "epoch": 0.4190377280785002, "grad_norm": 1.7894207239151, "learning_rate": 0.000130581617032993, "loss": 1.6328, "step": 11701 }, { "epoch": 0.4190735402080685, "grad_norm": 1.6137210130691528, "learning_rate": 0.00013057057351636205, "loss": 1.1927, "step": 11702 }, { "epoch": 0.41910935233763674, "grad_norm": 1.628922700881958, "learning_rate": 0.00013055952958844345, "loss": 1.7027, "step": 11703 }, { "epoch": 0.41914516446720507, "grad_norm": 1.7288146018981934, "learning_rate": 0.00013054848524938576, "loss": 1.5557, "step": 11704 }, { "epoch": 0.41918097659677334, "grad_norm": 1.3177454471588135, "learning_rate": 0.00013053744049933765, "loss": 1.5625, "step": 11705 }, { "epoch": 0.4192167887263416, "grad_norm": 1.693861722946167, "learning_rate": 0.00013052639533844766, "loss": 1.3449, "step": 11706 }, { "epoch": 0.4192526008559099, "grad_norm": 1.604946494102478, "learning_rate": 0.0001305153497668644, "loss": 1.5387, "step": 11707 }, { "epoch": 0.4192884129854782, "grad_norm": 1.9413237571716309, "learning_rate": 0.00013050430378473647, "loss": 1.275, "step": 11708 }, { "epoch": 0.41932422511504647, "grad_norm": 1.5269191265106201, "learning_rate": 0.00013049325739221247, "loss": 1.6002, "step": 11709 }, { "epoch": 0.41936003724461474, "grad_norm": 1.6607357263565063, "learning_rate": 0.00013048221058944103, "loss": 1.5763, "step": 11710 }, { "epoch": 0.41939584937418306, "grad_norm": 1.8496578931808472, "learning_rate": 0.00013047116337657077, "loss": 1.734, "step": 11711 }, { "epoch": 0.41943166150375133, "grad_norm": 1.3404603004455566, "learning_rate": 0.00013046011575375033, "loss": 1.3846, "step": 11712 }, { "epoch": 0.4194674736333196, "grad_norm": 1.9869346618652344, "learning_rate": 0.00013044906772112828, "loss": 1.4686, "step": 11713 }, { "epoch": 0.41950328576288787, "grad_norm": 1.6581941843032837, "learning_rate": 0.00013043801927885334, "loss": 1.5109, "step": 11714 }, { "epoch": 0.4195390978924562, "grad_norm": 2.1327946186065674, "learning_rate": 0.00013042697042707407, "loss": 1.2431, "step": 11715 }, { "epoch": 0.41957491002202446, "grad_norm": 1.4978673458099365, "learning_rate": 0.0001304159211659392, "loss": 1.5151, "step": 11716 }, { "epoch": 0.41961072215159273, "grad_norm": 1.6439182758331299, "learning_rate": 0.00013040487149559735, "loss": 1.4472, "step": 11717 }, { "epoch": 0.41964653428116105, "grad_norm": 1.6311148405075073, "learning_rate": 0.00013039382141619713, "loss": 1.6517, "step": 11718 }, { "epoch": 0.4196823464107293, "grad_norm": 1.6701090335845947, "learning_rate": 0.0001303827709278873, "loss": 1.8037, "step": 11719 }, { "epoch": 0.4197181585402976, "grad_norm": 1.9653955698013306, "learning_rate": 0.0001303717200308164, "loss": 1.3443, "step": 11720 }, { "epoch": 0.41975397066986586, "grad_norm": 1.8693047761917114, "learning_rate": 0.00013036066872513326, "loss": 1.6978, "step": 11721 }, { "epoch": 0.4197897827994342, "grad_norm": 1.5519013404846191, "learning_rate": 0.00013034961701098645, "loss": 1.355, "step": 11722 }, { "epoch": 0.41982559492900245, "grad_norm": 2.0810554027557373, "learning_rate": 0.0001303385648885247, "loss": 1.8034, "step": 11723 }, { "epoch": 0.4198614070585707, "grad_norm": 2.0059664249420166, "learning_rate": 0.00013032751235789668, "loss": 1.53, "step": 11724 }, { "epoch": 0.41989721918813905, "grad_norm": 1.6518661975860596, "learning_rate": 0.00013031645941925115, "loss": 1.591, "step": 11725 }, { "epoch": 0.4199330313177073, "grad_norm": 1.6742372512817383, "learning_rate": 0.00013030540607273674, "loss": 1.6334, "step": 11726 }, { "epoch": 0.4199688434472756, "grad_norm": 1.625395655632019, "learning_rate": 0.00013029435231850215, "loss": 1.4972, "step": 11727 }, { "epoch": 0.42000465557684385, "grad_norm": 1.3360016345977783, "learning_rate": 0.00013028329815669616, "loss": 1.2917, "step": 11728 }, { "epoch": 0.4200404677064122, "grad_norm": 1.6598440408706665, "learning_rate": 0.00013027224358746743, "loss": 1.4068, "step": 11729 }, { "epoch": 0.42007627983598045, "grad_norm": 1.5892927646636963, "learning_rate": 0.00013026118861096472, "loss": 1.239, "step": 11730 }, { "epoch": 0.4201120919655487, "grad_norm": 1.4301259517669678, "learning_rate": 0.00013025013322733674, "loss": 1.4511, "step": 11731 }, { "epoch": 0.42014790409511704, "grad_norm": 1.4316529035568237, "learning_rate": 0.00013023907743673228, "loss": 1.5084, "step": 11732 }, { "epoch": 0.4201837162246853, "grad_norm": 1.458953857421875, "learning_rate": 0.0001302280212393, "loss": 1.3913, "step": 11733 }, { "epoch": 0.4202195283542536, "grad_norm": 2.121302366256714, "learning_rate": 0.0001302169646351887, "loss": 1.6365, "step": 11734 }, { "epoch": 0.42025534048382185, "grad_norm": 1.6328710317611694, "learning_rate": 0.00013020590762454713, "loss": 1.5309, "step": 11735 }, { "epoch": 0.4202911526133902, "grad_norm": 1.7382535934448242, "learning_rate": 0.00013019485020752402, "loss": 1.28, "step": 11736 }, { "epoch": 0.42032696474295844, "grad_norm": 1.6773878335952759, "learning_rate": 0.00013018379238426814, "loss": 1.7261, "step": 11737 }, { "epoch": 0.4203627768725267, "grad_norm": 2.123692750930786, "learning_rate": 0.0001301727341549283, "loss": 1.6939, "step": 11738 }, { "epoch": 0.42039858900209504, "grad_norm": 1.591450810432434, "learning_rate": 0.0001301616755196532, "loss": 1.6366, "step": 11739 }, { "epoch": 0.4204344011316633, "grad_norm": 1.7275581359863281, "learning_rate": 0.0001301506164785917, "loss": 1.6026, "step": 11740 }, { "epoch": 0.4204702132612316, "grad_norm": 1.6297035217285156, "learning_rate": 0.00013013955703189252, "loss": 1.683, "step": 11741 }, { "epoch": 0.42050602539079984, "grad_norm": 1.9007762670516968, "learning_rate": 0.00013012849717970447, "loss": 1.3047, "step": 11742 }, { "epoch": 0.42054183752036817, "grad_norm": 1.8373106718063354, "learning_rate": 0.00013011743692217638, "loss": 1.6415, "step": 11743 }, { "epoch": 0.42057764964993644, "grad_norm": 1.351611852645874, "learning_rate": 0.000130106376259457, "loss": 1.3333, "step": 11744 }, { "epoch": 0.4206134617795047, "grad_norm": 1.4901500940322876, "learning_rate": 0.0001300953151916952, "loss": 1.6745, "step": 11745 }, { "epoch": 0.42064927390907303, "grad_norm": 1.6721500158309937, "learning_rate": 0.0001300842537190397, "loss": 1.4681, "step": 11746 }, { "epoch": 0.4206850860386413, "grad_norm": 2.408515453338623, "learning_rate": 0.00013007319184163944, "loss": 1.4235, "step": 11747 }, { "epoch": 0.42072089816820957, "grad_norm": 2.0116817951202393, "learning_rate": 0.00013006212955964311, "loss": 1.586, "step": 11748 }, { "epoch": 0.42075671029777784, "grad_norm": 1.4370110034942627, "learning_rate": 0.00013005106687319966, "loss": 1.8267, "step": 11749 }, { "epoch": 0.42079252242734616, "grad_norm": 1.985016942024231, "learning_rate": 0.00013004000378245782, "loss": 1.8785, "step": 11750 }, { "epoch": 0.42082833455691443, "grad_norm": 1.489750623703003, "learning_rate": 0.00013002894028756653, "loss": 1.3115, "step": 11751 }, { "epoch": 0.4208641466864827, "grad_norm": 1.9699902534484863, "learning_rate": 0.0001300178763886745, "loss": 1.4464, "step": 11752 }, { "epoch": 0.420899958816051, "grad_norm": 1.4457578659057617, "learning_rate": 0.00013000681208593073, "loss": 1.6236, "step": 11753 }, { "epoch": 0.4209357709456193, "grad_norm": 1.5516526699066162, "learning_rate": 0.000129995747379484, "loss": 1.5156, "step": 11754 }, { "epoch": 0.42097158307518756, "grad_norm": 1.7440998554229736, "learning_rate": 0.00012998468226948316, "loss": 1.7088, "step": 11755 }, { "epoch": 0.42100739520475583, "grad_norm": 1.6341153383255005, "learning_rate": 0.00012997361675607714, "loss": 1.3348, "step": 11756 }, { "epoch": 0.42104320733432415, "grad_norm": 2.47872257232666, "learning_rate": 0.0001299625508394147, "loss": 1.6474, "step": 11757 }, { "epoch": 0.4210790194638924, "grad_norm": 1.4526902437210083, "learning_rate": 0.00012995148451964487, "loss": 1.5869, "step": 11758 }, { "epoch": 0.4211148315934607, "grad_norm": 1.5664465427398682, "learning_rate": 0.00012994041779691639, "loss": 1.6571, "step": 11759 }, { "epoch": 0.421150643723029, "grad_norm": 1.4599276781082153, "learning_rate": 0.00012992935067137823, "loss": 1.357, "step": 11760 }, { "epoch": 0.4211864558525973, "grad_norm": 1.712045669555664, "learning_rate": 0.00012991828314317923, "loss": 1.531, "step": 11761 }, { "epoch": 0.42122226798216555, "grad_norm": 2.1027767658233643, "learning_rate": 0.00012990721521246839, "loss": 1.6629, "step": 11762 }, { "epoch": 0.4212580801117338, "grad_norm": 1.7195019721984863, "learning_rate": 0.00012989614687939453, "loss": 1.5359, "step": 11763 }, { "epoch": 0.42129389224130215, "grad_norm": 1.4466419219970703, "learning_rate": 0.00012988507814410652, "loss": 1.6942, "step": 11764 }, { "epoch": 0.4213297043708704, "grad_norm": 1.2719695568084717, "learning_rate": 0.0001298740090067534, "loss": 1.5447, "step": 11765 }, { "epoch": 0.4213655165004387, "grad_norm": 2.5164668560028076, "learning_rate": 0.000129862939467484, "loss": 1.5569, "step": 11766 }, { "epoch": 0.421401328630007, "grad_norm": 1.691938877105713, "learning_rate": 0.00012985186952644724, "loss": 1.4605, "step": 11767 }, { "epoch": 0.4214371407595753, "grad_norm": 1.8045603036880493, "learning_rate": 0.0001298407991837921, "loss": 1.3567, "step": 11768 }, { "epoch": 0.42147295288914355, "grad_norm": 1.5395952463150024, "learning_rate": 0.0001298297284396675, "loss": 1.6325, "step": 11769 }, { "epoch": 0.4215087650187118, "grad_norm": 1.8432800769805908, "learning_rate": 0.0001298186572942224, "loss": 1.4202, "step": 11770 }, { "epoch": 0.42154457714828014, "grad_norm": 2.240885019302368, "learning_rate": 0.00012980758574760573, "loss": 1.9024, "step": 11771 }, { "epoch": 0.4215803892778484, "grad_norm": 2.1993038654327393, "learning_rate": 0.00012979651379996642, "loss": 1.5855, "step": 11772 }, { "epoch": 0.4216162014074167, "grad_norm": 1.9593286514282227, "learning_rate": 0.00012978544145145343, "loss": 1.5088, "step": 11773 }, { "epoch": 0.421652013536985, "grad_norm": 1.6190402507781982, "learning_rate": 0.0001297743687022158, "loss": 1.6438, "step": 11774 }, { "epoch": 0.4216878256665533, "grad_norm": 1.6543118953704834, "learning_rate": 0.0001297632955524024, "loss": 1.2093, "step": 11775 }, { "epoch": 0.42172363779612154, "grad_norm": 1.6316370964050293, "learning_rate": 0.00012975222200216227, "loss": 1.3618, "step": 11776 }, { "epoch": 0.4217594499256898, "grad_norm": 1.841871738433838, "learning_rate": 0.00012974114805164438, "loss": 1.6844, "step": 11777 }, { "epoch": 0.42179526205525814, "grad_norm": 1.207383632659912, "learning_rate": 0.0001297300737009977, "loss": 1.4037, "step": 11778 }, { "epoch": 0.4218310741848264, "grad_norm": 1.9168089628219604, "learning_rate": 0.00012971899895037123, "loss": 1.3065, "step": 11779 }, { "epoch": 0.4218668863143947, "grad_norm": 1.7665578126907349, "learning_rate": 0.00012970792379991396, "loss": 1.4799, "step": 11780 }, { "epoch": 0.421902698443963, "grad_norm": 1.5356736183166504, "learning_rate": 0.00012969684824977492, "loss": 1.5496, "step": 11781 }, { "epoch": 0.42193851057353127, "grad_norm": 1.6036309003829956, "learning_rate": 0.00012968577230010304, "loss": 1.6618, "step": 11782 }, { "epoch": 0.42197432270309954, "grad_norm": 1.2157410383224487, "learning_rate": 0.0001296746959510474, "loss": 1.4071, "step": 11783 }, { "epoch": 0.4220101348326678, "grad_norm": 1.6793562173843384, "learning_rate": 0.00012966361920275702, "loss": 1.2486, "step": 11784 }, { "epoch": 0.42204594696223613, "grad_norm": 1.6444344520568848, "learning_rate": 0.0001296525420553809, "loss": 1.5359, "step": 11785 }, { "epoch": 0.4220817590918044, "grad_norm": 2.4854736328125, "learning_rate": 0.00012964146450906807, "loss": 1.4089, "step": 11786 }, { "epoch": 0.42211757122137267, "grad_norm": 2.2101008892059326, "learning_rate": 0.0001296303865639676, "loss": 1.6471, "step": 11787 }, { "epoch": 0.42215338335094094, "grad_norm": 1.3930305242538452, "learning_rate": 0.00012961930822022848, "loss": 1.5859, "step": 11788 }, { "epoch": 0.42218919548050926, "grad_norm": 1.6884602308273315, "learning_rate": 0.00012960822947799978, "loss": 1.4459, "step": 11789 }, { "epoch": 0.42222500761007753, "grad_norm": 1.2813154458999634, "learning_rate": 0.0001295971503374305, "loss": 1.5418, "step": 11790 }, { "epoch": 0.4222608197396458, "grad_norm": 1.8060952425003052, "learning_rate": 0.0001295860707986698, "loss": 1.5921, "step": 11791 }, { "epoch": 0.4222966318692141, "grad_norm": 2.59269642829895, "learning_rate": 0.00012957499086186665, "loss": 1.3835, "step": 11792 }, { "epoch": 0.4223324439987824, "grad_norm": 1.729286789894104, "learning_rate": 0.00012956391052717017, "loss": 1.4255, "step": 11793 }, { "epoch": 0.42236825612835066, "grad_norm": 1.4950153827667236, "learning_rate": 0.00012955282979472938, "loss": 1.1579, "step": 11794 }, { "epoch": 0.42240406825791893, "grad_norm": 1.7770169973373413, "learning_rate": 0.00012954174866469336, "loss": 1.7252, "step": 11795 }, { "epoch": 0.42243988038748725, "grad_norm": 1.7633696794509888, "learning_rate": 0.0001295306671372113, "loss": 1.5152, "step": 11796 }, { "epoch": 0.4224756925170555, "grad_norm": 2.1306397914886475, "learning_rate": 0.00012951958521243215, "loss": 1.6468, "step": 11797 }, { "epoch": 0.4225115046466238, "grad_norm": 1.2364559173583984, "learning_rate": 0.00012950850289050508, "loss": 1.5101, "step": 11798 }, { "epoch": 0.4225473167761921, "grad_norm": 1.563754677772522, "learning_rate": 0.00012949742017157915, "loss": 1.3717, "step": 11799 }, { "epoch": 0.4225831289057604, "grad_norm": 1.5410443544387817, "learning_rate": 0.0001294863370558035, "loss": 1.3353, "step": 11800 }, { "epoch": 0.42261894103532865, "grad_norm": 1.564864993095398, "learning_rate": 0.0001294752535433272, "loss": 1.3936, "step": 11801 }, { "epoch": 0.4226547531648969, "grad_norm": 1.5698978900909424, "learning_rate": 0.0001294641696342994, "loss": 1.5412, "step": 11802 }, { "epoch": 0.42269056529446525, "grad_norm": 2.2333059310913086, "learning_rate": 0.00012945308532886918, "loss": 1.3542, "step": 11803 }, { "epoch": 0.4227263774240335, "grad_norm": 1.559870719909668, "learning_rate": 0.0001294420006271857, "loss": 1.3988, "step": 11804 }, { "epoch": 0.4227621895536018, "grad_norm": 1.3145016431808472, "learning_rate": 0.00012943091552939807, "loss": 1.2073, "step": 11805 }, { "epoch": 0.4227980016831701, "grad_norm": 1.6928826570510864, "learning_rate": 0.00012941983003565544, "loss": 1.376, "step": 11806 }, { "epoch": 0.4228338138127384, "grad_norm": 1.3938753604888916, "learning_rate": 0.00012940874414610698, "loss": 1.5549, "step": 11807 }, { "epoch": 0.42286962594230665, "grad_norm": 1.5950511693954468, "learning_rate": 0.00012939765786090178, "loss": 1.3254, "step": 11808 }, { "epoch": 0.4229054380718749, "grad_norm": 1.5573126077651978, "learning_rate": 0.000129386571180189, "loss": 1.5849, "step": 11809 }, { "epoch": 0.42294125020144324, "grad_norm": 2.0934598445892334, "learning_rate": 0.00012937548410411778, "loss": 1.4638, "step": 11810 }, { "epoch": 0.4229770623310115, "grad_norm": 1.5477838516235352, "learning_rate": 0.0001293643966328374, "loss": 1.4116, "step": 11811 }, { "epoch": 0.4230128744605798, "grad_norm": 1.7207345962524414, "learning_rate": 0.00012935330876649687, "loss": 1.5597, "step": 11812 }, { "epoch": 0.4230486865901481, "grad_norm": 1.701752781867981, "learning_rate": 0.00012934222050524547, "loss": 1.6269, "step": 11813 }, { "epoch": 0.4230844987197164, "grad_norm": 1.5868911743164062, "learning_rate": 0.00012933113184923232, "loss": 1.4728, "step": 11814 }, { "epoch": 0.42312031084928464, "grad_norm": 1.414086103439331, "learning_rate": 0.00012932004279860663, "loss": 1.3683, "step": 11815 }, { "epoch": 0.4231561229788529, "grad_norm": 2.1255552768707275, "learning_rate": 0.0001293089533535176, "loss": 1.8208, "step": 11816 }, { "epoch": 0.42319193510842124, "grad_norm": 1.8227784633636475, "learning_rate": 0.00012929786351411439, "loss": 1.3282, "step": 11817 }, { "epoch": 0.4232277472379895, "grad_norm": 1.7852251529693604, "learning_rate": 0.00012928677328054623, "loss": 1.5736, "step": 11818 }, { "epoch": 0.4232635593675578, "grad_norm": 2.8936076164245605, "learning_rate": 0.00012927568265296227, "loss": 1.5581, "step": 11819 }, { "epoch": 0.4232993714971261, "grad_norm": 1.452358365058899, "learning_rate": 0.00012926459163151182, "loss": 1.6559, "step": 11820 }, { "epoch": 0.42333518362669437, "grad_norm": 1.7268511056900024, "learning_rate": 0.000129253500216344, "loss": 1.604, "step": 11821 }, { "epoch": 0.42337099575626264, "grad_norm": 1.3520252704620361, "learning_rate": 0.00012924240840760811, "loss": 1.4204, "step": 11822 }, { "epoch": 0.4234068078858309, "grad_norm": 1.4777060747146606, "learning_rate": 0.0001292313162054533, "loss": 1.3741, "step": 11823 }, { "epoch": 0.42344262001539923, "grad_norm": 1.7785598039627075, "learning_rate": 0.00012922022361002886, "loss": 1.6714, "step": 11824 }, { "epoch": 0.4234784321449675, "grad_norm": 1.92035710811615, "learning_rate": 0.00012920913062148398, "loss": 1.8324, "step": 11825 }, { "epoch": 0.42351424427453577, "grad_norm": 1.8269778490066528, "learning_rate": 0.00012919803723996794, "loss": 1.7714, "step": 11826 }, { "epoch": 0.4235500564041041, "grad_norm": 1.4856898784637451, "learning_rate": 0.00012918694346562997, "loss": 1.7747, "step": 11827 }, { "epoch": 0.42358586853367236, "grad_norm": 1.4505622386932373, "learning_rate": 0.0001291758492986193, "loss": 1.6209, "step": 11828 }, { "epoch": 0.42362168066324063, "grad_norm": 1.4111675024032593, "learning_rate": 0.00012916475473908525, "loss": 1.2532, "step": 11829 }, { "epoch": 0.4236574927928089, "grad_norm": 1.8499622344970703, "learning_rate": 0.00012915365978717703, "loss": 1.4083, "step": 11830 }, { "epoch": 0.4236933049223772, "grad_norm": 1.4217849969863892, "learning_rate": 0.0001291425644430439, "loss": 1.5601, "step": 11831 }, { "epoch": 0.4237291170519455, "grad_norm": 1.7619093656539917, "learning_rate": 0.0001291314687068352, "loss": 1.3126, "step": 11832 }, { "epoch": 0.42376492918151376, "grad_norm": 1.2267673015594482, "learning_rate": 0.00012912037257870016, "loss": 1.5579, "step": 11833 }, { "epoch": 0.4238007413110821, "grad_norm": 1.6975821256637573, "learning_rate": 0.00012910927605878803, "loss": 1.4415, "step": 11834 }, { "epoch": 0.42383655344065035, "grad_norm": 1.650166392326355, "learning_rate": 0.0001290981791472482, "loss": 1.8445, "step": 11835 }, { "epoch": 0.4238723655702186, "grad_norm": 1.423077940940857, "learning_rate": 0.00012908708184422983, "loss": 1.3797, "step": 11836 }, { "epoch": 0.4239081776997869, "grad_norm": 1.5820120573043823, "learning_rate": 0.0001290759841498823, "loss": 1.6298, "step": 11837 }, { "epoch": 0.4239439898293552, "grad_norm": 1.6595290899276733, "learning_rate": 0.00012906488606435497, "loss": 1.5093, "step": 11838 }, { "epoch": 0.4239798019589235, "grad_norm": 2.5189099311828613, "learning_rate": 0.00012905378758779702, "loss": 1.3822, "step": 11839 }, { "epoch": 0.42401561408849175, "grad_norm": 1.7056360244750977, "learning_rate": 0.00012904268872035787, "loss": 1.253, "step": 11840 }, { "epoch": 0.4240514262180601, "grad_norm": 1.8914939165115356, "learning_rate": 0.00012903158946218682, "loss": 1.4382, "step": 11841 }, { "epoch": 0.42408723834762835, "grad_norm": 1.5297006368637085, "learning_rate": 0.0001290204898134332, "loss": 1.5213, "step": 11842 }, { "epoch": 0.4241230504771966, "grad_norm": 1.7533084154129028, "learning_rate": 0.00012900938977424627, "loss": 1.4682, "step": 11843 }, { "epoch": 0.4241588626067649, "grad_norm": 1.830748200416565, "learning_rate": 0.00012899828934477545, "loss": 1.6938, "step": 11844 }, { "epoch": 0.4241946747363332, "grad_norm": 1.4886054992675781, "learning_rate": 0.00012898718852517003, "loss": 1.4325, "step": 11845 }, { "epoch": 0.4242304868659015, "grad_norm": 1.547685146331787, "learning_rate": 0.0001289760873155794, "loss": 1.6207, "step": 11846 }, { "epoch": 0.42426629899546975, "grad_norm": 1.416277289390564, "learning_rate": 0.00012896498571615287, "loss": 1.5239, "step": 11847 }, { "epoch": 0.4243021111250381, "grad_norm": 1.5753027200698853, "learning_rate": 0.00012895388372703985, "loss": 1.6592, "step": 11848 }, { "epoch": 0.42433792325460634, "grad_norm": 1.7635771036148071, "learning_rate": 0.00012894278134838963, "loss": 1.6179, "step": 11849 }, { "epoch": 0.4243737353841746, "grad_norm": 1.644930362701416, "learning_rate": 0.00012893167858035168, "loss": 1.5943, "step": 11850 }, { "epoch": 0.4244095475137429, "grad_norm": 1.333198070526123, "learning_rate": 0.00012892057542307527, "loss": 1.695, "step": 11851 }, { "epoch": 0.4244453596433112, "grad_norm": 2.0017025470733643, "learning_rate": 0.00012890947187670982, "loss": 1.617, "step": 11852 }, { "epoch": 0.4244811717728795, "grad_norm": 1.3186886310577393, "learning_rate": 0.00012889836794140473, "loss": 1.634, "step": 11853 }, { "epoch": 0.42451698390244774, "grad_norm": 1.535506248474121, "learning_rate": 0.00012888726361730935, "loss": 1.5335, "step": 11854 }, { "epoch": 0.42455279603201607, "grad_norm": 1.9215971231460571, "learning_rate": 0.00012887615890457314, "loss": 1.5718, "step": 11855 }, { "epoch": 0.42458860816158434, "grad_norm": 1.4248054027557373, "learning_rate": 0.00012886505380334544, "loss": 1.4283, "step": 11856 }, { "epoch": 0.4246244202911526, "grad_norm": 1.514243483543396, "learning_rate": 0.00012885394831377569, "loss": 1.6051, "step": 11857 }, { "epoch": 0.4246602324207209, "grad_norm": 1.4963092803955078, "learning_rate": 0.00012884284243601325, "loss": 1.4867, "step": 11858 }, { "epoch": 0.4246960445502892, "grad_norm": 1.4643319845199585, "learning_rate": 0.0001288317361702076, "loss": 1.4199, "step": 11859 }, { "epoch": 0.42473185667985747, "grad_norm": 1.4797992706298828, "learning_rate": 0.00012882062951650815, "loss": 1.4582, "step": 11860 }, { "epoch": 0.42476766880942574, "grad_norm": 2.1441917419433594, "learning_rate": 0.00012880952247506426, "loss": 1.4834, "step": 11861 }, { "epoch": 0.42480348093899406, "grad_norm": 1.7394976615905762, "learning_rate": 0.00012879841504602544, "loss": 1.747, "step": 11862 }, { "epoch": 0.42483929306856233, "grad_norm": 2.274009943008423, "learning_rate": 0.00012878730722954104, "loss": 1.596, "step": 11863 }, { "epoch": 0.4248751051981306, "grad_norm": 1.643452525138855, "learning_rate": 0.00012877619902576062, "loss": 1.6008, "step": 11864 }, { "epoch": 0.42491091732769887, "grad_norm": 1.7636380195617676, "learning_rate": 0.0001287650904348335, "loss": 1.037, "step": 11865 }, { "epoch": 0.4249467294572672, "grad_norm": 1.6334855556488037, "learning_rate": 0.00012875398145690924, "loss": 1.278, "step": 11866 }, { "epoch": 0.42498254158683546, "grad_norm": 1.7743029594421387, "learning_rate": 0.00012874287209213724, "loss": 1.373, "step": 11867 }, { "epoch": 0.42501835371640373, "grad_norm": 1.3626136779785156, "learning_rate": 0.00012873176234066698, "loss": 1.5498, "step": 11868 }, { "epoch": 0.42505416584597205, "grad_norm": 1.7889161109924316, "learning_rate": 0.0001287206522026479, "loss": 1.6937, "step": 11869 }, { "epoch": 0.4250899779755403, "grad_norm": 1.5051333904266357, "learning_rate": 0.0001287095416782295, "loss": 1.4984, "step": 11870 }, { "epoch": 0.4251257901051086, "grad_norm": 1.7558956146240234, "learning_rate": 0.00012869843076756125, "loss": 1.4792, "step": 11871 }, { "epoch": 0.42516160223467686, "grad_norm": 1.8319754600524902, "learning_rate": 0.0001286873194707926, "loss": 1.943, "step": 11872 }, { "epoch": 0.4251974143642452, "grad_norm": 1.6281830072402954, "learning_rate": 0.0001286762077880731, "loss": 1.3749, "step": 11873 }, { "epoch": 0.42523322649381345, "grad_norm": 1.5572781562805176, "learning_rate": 0.00012866509571955221, "loss": 1.7843, "step": 11874 }, { "epoch": 0.4252690386233817, "grad_norm": 1.4318057298660278, "learning_rate": 0.00012865398326537944, "loss": 1.423, "step": 11875 }, { "epoch": 0.42530485075295005, "grad_norm": 1.6866233348846436, "learning_rate": 0.0001286428704257043, "loss": 1.545, "step": 11876 }, { "epoch": 0.4253406628825183, "grad_norm": 1.7870521545410156, "learning_rate": 0.00012863175720067627, "loss": 1.7577, "step": 11877 }, { "epoch": 0.4253764750120866, "grad_norm": 1.4595621824264526, "learning_rate": 0.00012862064359044485, "loss": 1.5902, "step": 11878 }, { "epoch": 0.42541228714165485, "grad_norm": 1.636542797088623, "learning_rate": 0.00012860952959515962, "loss": 1.4817, "step": 11879 }, { "epoch": 0.4254480992712232, "grad_norm": 2.6225593090057373, "learning_rate": 0.00012859841521497008, "loss": 1.208, "step": 11880 }, { "epoch": 0.42548391140079145, "grad_norm": 1.9722484350204468, "learning_rate": 0.00012858730045002572, "loss": 1.7867, "step": 11881 }, { "epoch": 0.4255197235303597, "grad_norm": 1.486351490020752, "learning_rate": 0.00012857618530047615, "loss": 1.5268, "step": 11882 }, { "epoch": 0.42555553565992804, "grad_norm": 1.3822557926177979, "learning_rate": 0.0001285650697664708, "loss": 1.5033, "step": 11883 }, { "epoch": 0.4255913477894963, "grad_norm": 1.99789559841156, "learning_rate": 0.00012855395384815937, "loss": 1.8533, "step": 11884 }, { "epoch": 0.4256271599190646, "grad_norm": 1.781887173652649, "learning_rate": 0.00012854283754569127, "loss": 1.4333, "step": 11885 }, { "epoch": 0.42566297204863285, "grad_norm": 1.5997289419174194, "learning_rate": 0.00012853172085921613, "loss": 1.4675, "step": 11886 }, { "epoch": 0.4256987841782012, "grad_norm": 1.4423165321350098, "learning_rate": 0.00012852060378888347, "loss": 1.6605, "step": 11887 }, { "epoch": 0.42573459630776944, "grad_norm": 2.1727519035339355, "learning_rate": 0.00012850948633484288, "loss": 1.4342, "step": 11888 }, { "epoch": 0.4257704084373377, "grad_norm": 1.8773438930511475, "learning_rate": 0.00012849836849724392, "loss": 1.2294, "step": 11889 }, { "epoch": 0.42580622056690604, "grad_norm": 1.5362021923065186, "learning_rate": 0.0001284872502762362, "loss": 1.6375, "step": 11890 }, { "epoch": 0.4258420326964743, "grad_norm": 2.041426658630371, "learning_rate": 0.00012847613167196923, "loss": 1.4472, "step": 11891 }, { "epoch": 0.4258778448260426, "grad_norm": 1.3776212930679321, "learning_rate": 0.00012846501268459266, "loss": 1.2878, "step": 11892 }, { "epoch": 0.42591365695561084, "grad_norm": 1.8141348361968994, "learning_rate": 0.00012845389331425606, "loss": 1.2643, "step": 11893 }, { "epoch": 0.42594946908517917, "grad_norm": 1.4836647510528564, "learning_rate": 0.00012844277356110906, "loss": 1.6488, "step": 11894 }, { "epoch": 0.42598528121474744, "grad_norm": 1.3631209135055542, "learning_rate": 0.0001284316534253012, "loss": 1.3385, "step": 11895 }, { "epoch": 0.4260210933443157, "grad_norm": 2.604870319366455, "learning_rate": 0.0001284205329069821, "loss": 1.5673, "step": 11896 }, { "epoch": 0.42605690547388403, "grad_norm": 1.4967273473739624, "learning_rate": 0.00012840941200630143, "loss": 1.5662, "step": 11897 }, { "epoch": 0.4260927176034523, "grad_norm": 1.547292709350586, "learning_rate": 0.00012839829072340875, "loss": 1.4669, "step": 11898 }, { "epoch": 0.42612852973302057, "grad_norm": 1.751133918762207, "learning_rate": 0.0001283871690584537, "loss": 1.4935, "step": 11899 }, { "epoch": 0.42616434186258884, "grad_norm": 1.6280288696289062, "learning_rate": 0.00012837604701158587, "loss": 1.7348, "step": 11900 }, { "epoch": 0.42620015399215716, "grad_norm": 1.4411042928695679, "learning_rate": 0.000128364924582955, "loss": 1.5411, "step": 11901 }, { "epoch": 0.42623596612172543, "grad_norm": 1.841700553894043, "learning_rate": 0.00012835380177271058, "loss": 1.4303, "step": 11902 }, { "epoch": 0.4262717782512937, "grad_norm": 1.4292374849319458, "learning_rate": 0.00012834267858100238, "loss": 1.3253, "step": 11903 }, { "epoch": 0.426307590380862, "grad_norm": 1.8277291059494019, "learning_rate": 0.00012833155500798003, "loss": 1.5402, "step": 11904 }, { "epoch": 0.4263434025104303, "grad_norm": 1.7723889350891113, "learning_rate": 0.0001283204310537931, "loss": 1.5183, "step": 11905 }, { "epoch": 0.42637921463999856, "grad_norm": 1.554281234741211, "learning_rate": 0.00012830930671859132, "loss": 1.3464, "step": 11906 }, { "epoch": 0.42641502676956683, "grad_norm": 1.9932013750076294, "learning_rate": 0.00012829818200252432, "loss": 1.584, "step": 11907 }, { "epoch": 0.42645083889913515, "grad_norm": 1.3307794332504272, "learning_rate": 0.0001282870569057418, "loss": 1.5602, "step": 11908 }, { "epoch": 0.4264866510287034, "grad_norm": 1.5475578308105469, "learning_rate": 0.0001282759314283934, "loss": 1.3701, "step": 11909 }, { "epoch": 0.4265224631582717, "grad_norm": 1.49881911277771, "learning_rate": 0.00012826480557062884, "loss": 1.339, "step": 11910 }, { "epoch": 0.42655827528784, "grad_norm": 2.00813364982605, "learning_rate": 0.00012825367933259774, "loss": 1.5767, "step": 11911 }, { "epoch": 0.4265940874174083, "grad_norm": 1.6309877634048462, "learning_rate": 0.00012824255271444987, "loss": 1.5292, "step": 11912 }, { "epoch": 0.42662989954697655, "grad_norm": 1.6364531517028809, "learning_rate": 0.00012823142571633488, "loss": 1.6134, "step": 11913 }, { "epoch": 0.4266657116765448, "grad_norm": 1.7823848724365234, "learning_rate": 0.00012822029833840245, "loss": 1.3748, "step": 11914 }, { "epoch": 0.42670152380611315, "grad_norm": 1.6810747385025024, "learning_rate": 0.00012820917058080234, "loss": 1.6998, "step": 11915 }, { "epoch": 0.4267373359356814, "grad_norm": 1.9501549005508423, "learning_rate": 0.0001281980424436842, "loss": 1.5711, "step": 11916 }, { "epoch": 0.4267731480652497, "grad_norm": 1.8748064041137695, "learning_rate": 0.00012818691392719778, "loss": 1.5153, "step": 11917 }, { "epoch": 0.426808960194818, "grad_norm": 1.5978293418884277, "learning_rate": 0.00012817578503149276, "loss": 1.6448, "step": 11918 }, { "epoch": 0.4268447723243863, "grad_norm": 1.8714993000030518, "learning_rate": 0.00012816465575671895, "loss": 1.4063, "step": 11919 }, { "epoch": 0.42688058445395455, "grad_norm": 1.4037456512451172, "learning_rate": 0.000128153526103026, "loss": 1.5125, "step": 11920 }, { "epoch": 0.4269163965835228, "grad_norm": 1.7890756130218506, "learning_rate": 0.00012814239607056367, "loss": 1.3303, "step": 11921 }, { "epoch": 0.42695220871309114, "grad_norm": 2.235222578048706, "learning_rate": 0.00012813126565948171, "loss": 1.1991, "step": 11922 }, { "epoch": 0.4269880208426594, "grad_norm": 2.393725633621216, "learning_rate": 0.00012812013486992985, "loss": 1.7267, "step": 11923 }, { "epoch": 0.4270238329722277, "grad_norm": 1.7477926015853882, "learning_rate": 0.00012810900370205784, "loss": 1.7094, "step": 11924 }, { "epoch": 0.427059645101796, "grad_norm": 1.6497920751571655, "learning_rate": 0.00012809787215601546, "loss": 1.6286, "step": 11925 }, { "epoch": 0.4270954572313643, "grad_norm": 1.7119648456573486, "learning_rate": 0.00012808674023195244, "loss": 1.5985, "step": 11926 }, { "epoch": 0.42713126936093254, "grad_norm": 1.2613149881362915, "learning_rate": 0.00012807560793001856, "loss": 1.426, "step": 11927 }, { "epoch": 0.4271670814905008, "grad_norm": 1.6869969367980957, "learning_rate": 0.0001280644752503636, "loss": 1.4147, "step": 11928 }, { "epoch": 0.42720289362006914, "grad_norm": 1.247328758239746, "learning_rate": 0.00012805334219313734, "loss": 1.5805, "step": 11929 }, { "epoch": 0.4272387057496374, "grad_norm": 2.01601243019104, "learning_rate": 0.00012804220875848953, "loss": 1.4124, "step": 11930 }, { "epoch": 0.4272745178792057, "grad_norm": 2.005664587020874, "learning_rate": 0.00012803107494657, "loss": 1.4524, "step": 11931 }, { "epoch": 0.427310330008774, "grad_norm": 2.120532274246216, "learning_rate": 0.0001280199407575285, "loss": 1.7616, "step": 11932 }, { "epoch": 0.42734614213834227, "grad_norm": 1.7293062210083008, "learning_rate": 0.0001280088061915148, "loss": 1.3992, "step": 11933 }, { "epoch": 0.42738195426791054, "grad_norm": 2.858889579772949, "learning_rate": 0.00012799767124867874, "loss": 1.418, "step": 11934 }, { "epoch": 0.4274177663974788, "grad_norm": 3.2688639163970947, "learning_rate": 0.00012798653592917017, "loss": 1.47, "step": 11935 }, { "epoch": 0.42745357852704713, "grad_norm": 1.553168773651123, "learning_rate": 0.00012797540023313882, "loss": 1.6188, "step": 11936 }, { "epoch": 0.4274893906566154, "grad_norm": 1.8382031917572021, "learning_rate": 0.0001279642641607346, "loss": 1.5762, "step": 11937 }, { "epoch": 0.42752520278618367, "grad_norm": 1.7807074785232544, "learning_rate": 0.00012795312771210726, "loss": 1.4538, "step": 11938 }, { "epoch": 0.427561014915752, "grad_norm": 1.8465766906738281, "learning_rate": 0.00012794199088740665, "loss": 1.3205, "step": 11939 }, { "epoch": 0.42759682704532026, "grad_norm": 1.5674152374267578, "learning_rate": 0.00012793085368678254, "loss": 1.5163, "step": 11940 }, { "epoch": 0.42763263917488853, "grad_norm": 1.7111066579818726, "learning_rate": 0.00012791971611038488, "loss": 1.2398, "step": 11941 }, { "epoch": 0.4276684513044568, "grad_norm": 1.754284381866455, "learning_rate": 0.00012790857815836342, "loss": 1.4891, "step": 11942 }, { "epoch": 0.4277042634340251, "grad_norm": 1.5504387617111206, "learning_rate": 0.00012789743983086807, "loss": 1.4967, "step": 11943 }, { "epoch": 0.4277400755635934, "grad_norm": 1.2893853187561035, "learning_rate": 0.00012788630112804862, "loss": 1.4526, "step": 11944 }, { "epoch": 0.42777588769316166, "grad_norm": 1.9921671152114868, "learning_rate": 0.000127875162050055, "loss": 1.4574, "step": 11945 }, { "epoch": 0.42781169982273, "grad_norm": 1.7197723388671875, "learning_rate": 0.00012786402259703697, "loss": 1.2273, "step": 11946 }, { "epoch": 0.42784751195229825, "grad_norm": 2.255772590637207, "learning_rate": 0.0001278528827691445, "loss": 1.456, "step": 11947 }, { "epoch": 0.4278833240818665, "grad_norm": 2.25209903717041, "learning_rate": 0.00012784174256652743, "loss": 1.4299, "step": 11948 }, { "epoch": 0.4279191362114348, "grad_norm": 1.667028784751892, "learning_rate": 0.0001278306019893356, "loss": 1.021, "step": 11949 }, { "epoch": 0.4279549483410031, "grad_norm": 1.792156457901001, "learning_rate": 0.00012781946103771892, "loss": 1.347, "step": 11950 }, { "epoch": 0.4279907604705714, "grad_norm": 1.6226983070373535, "learning_rate": 0.0001278083197118273, "loss": 1.6862, "step": 11951 }, { "epoch": 0.42802657260013965, "grad_norm": 1.4472019672393799, "learning_rate": 0.00012779717801181058, "loss": 1.6126, "step": 11952 }, { "epoch": 0.428062384729708, "grad_norm": 2.137946844100952, "learning_rate": 0.0001277860359378187, "loss": 1.5559, "step": 11953 }, { "epoch": 0.42809819685927625, "grad_norm": 2.2679343223571777, "learning_rate": 0.00012777489349000156, "loss": 1.5858, "step": 11954 }, { "epoch": 0.4281340089888445, "grad_norm": 1.3759374618530273, "learning_rate": 0.00012776375066850902, "loss": 1.4777, "step": 11955 }, { "epoch": 0.4281698211184128, "grad_norm": 1.530863642692566, "learning_rate": 0.00012775260747349107, "loss": 1.1968, "step": 11956 }, { "epoch": 0.4282056332479811, "grad_norm": 1.4429298639297485, "learning_rate": 0.0001277414639050976, "loss": 1.6555, "step": 11957 }, { "epoch": 0.4282414453775494, "grad_norm": 2.4273345470428467, "learning_rate": 0.00012773031996347845, "loss": 1.3997, "step": 11958 }, { "epoch": 0.42827725750711765, "grad_norm": 1.5408649444580078, "learning_rate": 0.00012771917564878367, "loss": 1.3479, "step": 11959 }, { "epoch": 0.428313069636686, "grad_norm": 1.3045849800109863, "learning_rate": 0.00012770803096116308, "loss": 1.3514, "step": 11960 }, { "epoch": 0.42834888176625424, "grad_norm": 1.5886048078536987, "learning_rate": 0.00012769688590076673, "loss": 1.3276, "step": 11961 }, { "epoch": 0.4283846938958225, "grad_norm": 1.6271336078643799, "learning_rate": 0.00012768574046774446, "loss": 1.6032, "step": 11962 }, { "epoch": 0.4284205060253908, "grad_norm": 1.9809879064559937, "learning_rate": 0.00012767459466224632, "loss": 1.6735, "step": 11963 }, { "epoch": 0.4284563181549591, "grad_norm": 1.7093204259872437, "learning_rate": 0.00012766344848442218, "loss": 1.7656, "step": 11964 }, { "epoch": 0.4284921302845274, "grad_norm": 1.7945045232772827, "learning_rate": 0.00012765230193442198, "loss": 1.5466, "step": 11965 }, { "epoch": 0.42852794241409564, "grad_norm": 2.3185229301452637, "learning_rate": 0.0001276411550123958, "loss": 1.4034, "step": 11966 }, { "epoch": 0.42856375454366397, "grad_norm": 1.5377254486083984, "learning_rate": 0.00012763000771849348, "loss": 1.7145, "step": 11967 }, { "epoch": 0.42859956667323224, "grad_norm": 1.9657632112503052, "learning_rate": 0.00012761886005286508, "loss": 1.5286, "step": 11968 }, { "epoch": 0.4286353788028005, "grad_norm": 1.495625376701355, "learning_rate": 0.0001276077120156605, "loss": 1.3016, "step": 11969 }, { "epoch": 0.4286711909323688, "grad_norm": 1.7579660415649414, "learning_rate": 0.0001275965636070298, "loss": 1.2155, "step": 11970 }, { "epoch": 0.4287070030619371, "grad_norm": 2.0565755367279053, "learning_rate": 0.00012758541482712295, "loss": 1.3118, "step": 11971 }, { "epoch": 0.42874281519150537, "grad_norm": 1.7184526920318604, "learning_rate": 0.0001275742656760899, "loss": 1.3649, "step": 11972 }, { "epoch": 0.42877862732107364, "grad_norm": 1.7197214365005493, "learning_rate": 0.00012756311615408068, "loss": 1.2731, "step": 11973 }, { "epoch": 0.42881443945064196, "grad_norm": 1.3079050779342651, "learning_rate": 0.0001275519662612453, "loss": 1.3794, "step": 11974 }, { "epoch": 0.42885025158021023, "grad_norm": 1.7446621656417847, "learning_rate": 0.00012754081599773373, "loss": 1.6582, "step": 11975 }, { "epoch": 0.4288860637097785, "grad_norm": 1.5403920412063599, "learning_rate": 0.00012752966536369603, "loss": 1.3348, "step": 11976 }, { "epoch": 0.42892187583934677, "grad_norm": 1.4818763732910156, "learning_rate": 0.00012751851435928216, "loss": 1.5335, "step": 11977 }, { "epoch": 0.4289576879689151, "grad_norm": 1.5109602212905884, "learning_rate": 0.00012750736298464216, "loss": 1.3644, "step": 11978 }, { "epoch": 0.42899350009848336, "grad_norm": 2.0222465991973877, "learning_rate": 0.00012749621123992613, "loss": 1.3843, "step": 11979 }, { "epoch": 0.42902931222805163, "grad_norm": 1.4966095685958862, "learning_rate": 0.000127485059125284, "loss": 1.337, "step": 11980 }, { "epoch": 0.42906512435761995, "grad_norm": 1.5781891345977783, "learning_rate": 0.0001274739066408659, "loss": 1.6262, "step": 11981 }, { "epoch": 0.4291009364871882, "grad_norm": 1.5741047859191895, "learning_rate": 0.0001274627537868218, "loss": 1.5799, "step": 11982 }, { "epoch": 0.4291367486167565, "grad_norm": 1.477160096168518, "learning_rate": 0.00012745160056330178, "loss": 1.658, "step": 11983 }, { "epoch": 0.42917256074632476, "grad_norm": 2.298166036605835, "learning_rate": 0.00012744044697045586, "loss": 1.9077, "step": 11984 }, { "epoch": 0.4292083728758931, "grad_norm": 1.8923615217208862, "learning_rate": 0.00012742929300843417, "loss": 1.634, "step": 11985 }, { "epoch": 0.42924418500546135, "grad_norm": 2.1138875484466553, "learning_rate": 0.00012741813867738665, "loss": 1.8653, "step": 11986 }, { "epoch": 0.4292799971350296, "grad_norm": 2.7330331802368164, "learning_rate": 0.00012740698397746352, "loss": 1.5775, "step": 11987 }, { "epoch": 0.4293158092645979, "grad_norm": 1.834524154663086, "learning_rate": 0.0001273958289088147, "loss": 1.6638, "step": 11988 }, { "epoch": 0.4293516213941662, "grad_norm": 1.6845818758010864, "learning_rate": 0.0001273846734715904, "loss": 1.3616, "step": 11989 }, { "epoch": 0.4293874335237345, "grad_norm": 1.5297510623931885, "learning_rate": 0.0001273735176659406, "loss": 1.2825, "step": 11990 }, { "epoch": 0.42942324565330275, "grad_norm": 1.622412085533142, "learning_rate": 0.00012736236149201547, "loss": 1.5248, "step": 11991 }, { "epoch": 0.4294590577828711, "grad_norm": 1.8445333242416382, "learning_rate": 0.000127351204949965, "loss": 1.6047, "step": 11992 }, { "epoch": 0.42949486991243935, "grad_norm": 1.7746164798736572, "learning_rate": 0.0001273400480399394, "loss": 1.3141, "step": 11993 }, { "epoch": 0.4295306820420076, "grad_norm": 1.7072081565856934, "learning_rate": 0.00012732889076208872, "loss": 1.5455, "step": 11994 }, { "epoch": 0.4295664941715759, "grad_norm": 1.5068244934082031, "learning_rate": 0.00012731773311656304, "loss": 1.6053, "step": 11995 }, { "epoch": 0.4296023063011442, "grad_norm": 1.6956735849380493, "learning_rate": 0.00012730657510351252, "loss": 1.6389, "step": 11996 }, { "epoch": 0.4296381184307125, "grad_norm": 1.3766717910766602, "learning_rate": 0.00012729541672308722, "loss": 1.3844, "step": 11997 }, { "epoch": 0.42967393056028075, "grad_norm": 1.9688079357147217, "learning_rate": 0.00012728425797543731, "loss": 1.5564, "step": 11998 }, { "epoch": 0.4297097426898491, "grad_norm": 1.6623727083206177, "learning_rate": 0.00012727309886071292, "loss": 1.1367, "step": 11999 }, { "epoch": 0.42974555481941734, "grad_norm": 1.2293025255203247, "learning_rate": 0.00012726193937906416, "loss": 1.4147, "step": 12000 }, { "epoch": 0.4297813669489856, "grad_norm": 2.1265058517456055, "learning_rate": 0.00012725077953064119, "loss": 1.5568, "step": 12001 }, { "epoch": 0.4298171790785539, "grad_norm": 1.5156117677688599, "learning_rate": 0.0001272396193155941, "loss": 1.3789, "step": 12002 }, { "epoch": 0.4298529912081222, "grad_norm": 2.3674230575561523, "learning_rate": 0.0001272284587340731, "loss": 1.4465, "step": 12003 }, { "epoch": 0.4298888033376905, "grad_norm": 1.604242205619812, "learning_rate": 0.00012721729778622826, "loss": 1.6652, "step": 12004 }, { "epoch": 0.42992461546725874, "grad_norm": 1.3439925909042358, "learning_rate": 0.00012720613647220985, "loss": 1.2755, "step": 12005 }, { "epoch": 0.42996042759682707, "grad_norm": 3.2509360313415527, "learning_rate": 0.0001271949747921679, "loss": 1.6501, "step": 12006 }, { "epoch": 0.42999623972639534, "grad_norm": 1.9098608493804932, "learning_rate": 0.0001271838127462527, "loss": 1.5589, "step": 12007 }, { "epoch": 0.4300320518559636, "grad_norm": 2.2342143058776855, "learning_rate": 0.0001271726503346143, "loss": 1.221, "step": 12008 }, { "epoch": 0.4300678639855319, "grad_norm": 1.2617571353912354, "learning_rate": 0.00012716148755740302, "loss": 1.5833, "step": 12009 }, { "epoch": 0.4301036761151002, "grad_norm": 1.4430612325668335, "learning_rate": 0.00012715032441476892, "loss": 1.5831, "step": 12010 }, { "epoch": 0.43013948824466847, "grad_norm": 1.642476201057434, "learning_rate": 0.00012713916090686223, "loss": 1.626, "step": 12011 }, { "epoch": 0.43017530037423674, "grad_norm": 1.5486873388290405, "learning_rate": 0.00012712799703383314, "loss": 1.4034, "step": 12012 }, { "epoch": 0.43021111250380506, "grad_norm": 2.0201334953308105, "learning_rate": 0.00012711683279583181, "loss": 1.557, "step": 12013 }, { "epoch": 0.43024692463337333, "grad_norm": 1.413825511932373, "learning_rate": 0.00012710566819300854, "loss": 1.6422, "step": 12014 }, { "epoch": 0.4302827367629416, "grad_norm": 1.2569329738616943, "learning_rate": 0.00012709450322551338, "loss": 1.4474, "step": 12015 }, { "epoch": 0.43031854889250987, "grad_norm": 1.874431848526001, "learning_rate": 0.00012708333789349671, "loss": 1.6731, "step": 12016 }, { "epoch": 0.4303543610220782, "grad_norm": 1.6098730564117432, "learning_rate": 0.00012707217219710864, "loss": 1.416, "step": 12017 }, { "epoch": 0.43039017315164646, "grad_norm": 1.6854571104049683, "learning_rate": 0.0001270610061364994, "loss": 1.4469, "step": 12018 }, { "epoch": 0.43042598528121473, "grad_norm": 1.5974562168121338, "learning_rate": 0.00012704983971181924, "loss": 1.3327, "step": 12019 }, { "epoch": 0.43046179741078305, "grad_norm": 1.5737115144729614, "learning_rate": 0.00012703867292321837, "loss": 1.779, "step": 12020 }, { "epoch": 0.4304976095403513, "grad_norm": 1.6570041179656982, "learning_rate": 0.00012702750577084704, "loss": 1.5277, "step": 12021 }, { "epoch": 0.4305334216699196, "grad_norm": 1.3756600618362427, "learning_rate": 0.00012701633825485547, "loss": 1.616, "step": 12022 }, { "epoch": 0.43056923379948786, "grad_norm": 1.5845674276351929, "learning_rate": 0.00012700517037539394, "loss": 1.7271, "step": 12023 }, { "epoch": 0.4306050459290562, "grad_norm": 1.8880497217178345, "learning_rate": 0.00012699400213261262, "loss": 1.512, "step": 12024 }, { "epoch": 0.43064085805862445, "grad_norm": 1.240716576576233, "learning_rate": 0.0001269828335266619, "loss": 1.258, "step": 12025 }, { "epoch": 0.4306766701881927, "grad_norm": 1.8755682706832886, "learning_rate": 0.00012697166455769192, "loss": 1.6694, "step": 12026 }, { "epoch": 0.43071248231776105, "grad_norm": 1.8921705484390259, "learning_rate": 0.000126960495225853, "loss": 1.3913, "step": 12027 }, { "epoch": 0.4307482944473293, "grad_norm": 1.7119035720825195, "learning_rate": 0.00012694932553129537, "loss": 1.3978, "step": 12028 }, { "epoch": 0.4307841065768976, "grad_norm": 1.7253758907318115, "learning_rate": 0.00012693815547416934, "loss": 1.5819, "step": 12029 }, { "epoch": 0.43081991870646585, "grad_norm": 1.5359183549880981, "learning_rate": 0.00012692698505462516, "loss": 1.3002, "step": 12030 }, { "epoch": 0.4308557308360342, "grad_norm": 1.8340015411376953, "learning_rate": 0.00012691581427281317, "loss": 1.536, "step": 12031 }, { "epoch": 0.43089154296560245, "grad_norm": 1.6742093563079834, "learning_rate": 0.00012690464312888357, "loss": 1.3717, "step": 12032 }, { "epoch": 0.4309273550951707, "grad_norm": 2.0416200160980225, "learning_rate": 0.0001268934716229867, "loss": 1.751, "step": 12033 }, { "epoch": 0.43096316722473904, "grad_norm": 1.9123685359954834, "learning_rate": 0.0001268822997552729, "loss": 1.5788, "step": 12034 }, { "epoch": 0.4309989793543073, "grad_norm": 1.4536606073379517, "learning_rate": 0.00012687112752589243, "loss": 1.5731, "step": 12035 }, { "epoch": 0.4310347914838756, "grad_norm": 1.5964137315750122, "learning_rate": 0.00012685995493499558, "loss": 1.2681, "step": 12036 }, { "epoch": 0.43107060361344385, "grad_norm": 1.6218794584274292, "learning_rate": 0.00012684878198273268, "loss": 1.449, "step": 12037 }, { "epoch": 0.4311064157430122, "grad_norm": 1.6466941833496094, "learning_rate": 0.00012683760866925408, "loss": 1.5735, "step": 12038 }, { "epoch": 0.43114222787258044, "grad_norm": 1.5569286346435547, "learning_rate": 0.00012682643499471003, "loss": 1.4015, "step": 12039 }, { "epoch": 0.4311780400021487, "grad_norm": 1.7250436544418335, "learning_rate": 0.00012681526095925094, "loss": 1.4717, "step": 12040 }, { "epoch": 0.43121385213171703, "grad_norm": 1.3829773664474487, "learning_rate": 0.00012680408656302707, "loss": 1.5221, "step": 12041 }, { "epoch": 0.4312496642612853, "grad_norm": 1.6153755187988281, "learning_rate": 0.00012679291180618882, "loss": 1.6274, "step": 12042 }, { "epoch": 0.4312854763908536, "grad_norm": 1.467158555984497, "learning_rate": 0.00012678173668888645, "loss": 1.4342, "step": 12043 }, { "epoch": 0.43132128852042184, "grad_norm": 1.740787148475647, "learning_rate": 0.00012677056121127042, "loss": 1.855, "step": 12044 }, { "epoch": 0.43135710064999017, "grad_norm": 1.7290964126586914, "learning_rate": 0.000126759385373491, "loss": 1.5766, "step": 12045 }, { "epoch": 0.43139291277955844, "grad_norm": 1.2313038110733032, "learning_rate": 0.00012674820917569856, "loss": 1.0122, "step": 12046 }, { "epoch": 0.4314287249091267, "grad_norm": 1.7760591506958008, "learning_rate": 0.0001267370326180435, "loss": 1.4813, "step": 12047 }, { "epoch": 0.43146453703869503, "grad_norm": 2.26284122467041, "learning_rate": 0.0001267258557006761, "loss": 1.6065, "step": 12048 }, { "epoch": 0.4315003491682633, "grad_norm": 1.742746114730835, "learning_rate": 0.00012671467842374683, "loss": 1.8581, "step": 12049 }, { "epoch": 0.43153616129783157, "grad_norm": 2.243811845779419, "learning_rate": 0.000126703500787406, "loss": 1.4152, "step": 12050 }, { "epoch": 0.43157197342739984, "grad_norm": 1.9817683696746826, "learning_rate": 0.000126692322791804, "loss": 1.4592, "step": 12051 }, { "epoch": 0.43160778555696816, "grad_norm": 1.667299509048462, "learning_rate": 0.00012668114443709124, "loss": 1.8244, "step": 12052 }, { "epoch": 0.43164359768653643, "grad_norm": 1.9919838905334473, "learning_rate": 0.0001266699657234181, "loss": 1.7562, "step": 12053 }, { "epoch": 0.4316794098161047, "grad_norm": 1.6336984634399414, "learning_rate": 0.000126658786650935, "loss": 1.5799, "step": 12054 }, { "epoch": 0.431715221945673, "grad_norm": 1.2953890562057495, "learning_rate": 0.00012664760721979227, "loss": 1.2287, "step": 12055 }, { "epoch": 0.4317510340752413, "grad_norm": 1.822380781173706, "learning_rate": 0.00012663642743014037, "loss": 1.2884, "step": 12056 }, { "epoch": 0.43178684620480956, "grad_norm": 2.05786395072937, "learning_rate": 0.0001266252472821297, "loss": 1.4525, "step": 12057 }, { "epoch": 0.43182265833437783, "grad_norm": 1.759456992149353, "learning_rate": 0.00012661406677591067, "loss": 1.2702, "step": 12058 }, { "epoch": 0.43185847046394615, "grad_norm": 1.612596035003662, "learning_rate": 0.00012660288591163373, "loss": 1.5832, "step": 12059 }, { "epoch": 0.4318942825935144, "grad_norm": 1.3318983316421509, "learning_rate": 0.00012659170468944924, "loss": 1.2931, "step": 12060 }, { "epoch": 0.4319300947230827, "grad_norm": 1.8278427124023438, "learning_rate": 0.00012658052310950767, "loss": 1.6496, "step": 12061 }, { "epoch": 0.431965906852651, "grad_norm": 1.4100539684295654, "learning_rate": 0.00012656934117195946, "loss": 1.6082, "step": 12062 }, { "epoch": 0.4320017189822193, "grad_norm": 1.8377265930175781, "learning_rate": 0.00012655815887695503, "loss": 1.5018, "step": 12063 }, { "epoch": 0.43203753111178755, "grad_norm": 1.7149466276168823, "learning_rate": 0.00012654697622464483, "loss": 1.6423, "step": 12064 }, { "epoch": 0.4320733432413558, "grad_norm": 1.707377552986145, "learning_rate": 0.0001265357932151793, "loss": 1.5497, "step": 12065 }, { "epoch": 0.43210915537092415, "grad_norm": 1.3086154460906982, "learning_rate": 0.0001265246098487089, "loss": 1.3625, "step": 12066 }, { "epoch": 0.4321449675004924, "grad_norm": 1.9298018217086792, "learning_rate": 0.0001265134261253841, "loss": 1.4781, "step": 12067 }, { "epoch": 0.4321807796300607, "grad_norm": 1.6140490770339966, "learning_rate": 0.00012650224204535535, "loss": 1.5533, "step": 12068 }, { "epoch": 0.432216591759629, "grad_norm": 1.5894731283187866, "learning_rate": 0.00012649105760877312, "loss": 1.3304, "step": 12069 }, { "epoch": 0.4322524038891973, "grad_norm": 1.940897822380066, "learning_rate": 0.00012647987281578789, "loss": 1.7295, "step": 12070 }, { "epoch": 0.43228821601876555, "grad_norm": 1.6926662921905518, "learning_rate": 0.0001264686876665501, "loss": 1.2571, "step": 12071 }, { "epoch": 0.4323240281483338, "grad_norm": 1.5556963682174683, "learning_rate": 0.00012645750216121028, "loss": 1.3275, "step": 12072 }, { "epoch": 0.43235984027790214, "grad_norm": 1.9698765277862549, "learning_rate": 0.0001264463162999189, "loss": 1.517, "step": 12073 }, { "epoch": 0.4323956524074704, "grad_norm": 1.5589221715927124, "learning_rate": 0.00012643513008282645, "loss": 1.5866, "step": 12074 }, { "epoch": 0.4324314645370387, "grad_norm": 1.5207163095474243, "learning_rate": 0.00012642394351008337, "loss": 1.5868, "step": 12075 }, { "epoch": 0.432467276666607, "grad_norm": 1.5345786809921265, "learning_rate": 0.00012641275658184026, "loss": 1.5199, "step": 12076 }, { "epoch": 0.4325030887961753, "grad_norm": 1.7944084405899048, "learning_rate": 0.00012640156929824757, "loss": 1.627, "step": 12077 }, { "epoch": 0.43253890092574354, "grad_norm": 1.890235185623169, "learning_rate": 0.00012639038165945584, "loss": 1.4801, "step": 12078 }, { "epoch": 0.4325747130553118, "grad_norm": 1.4263070821762085, "learning_rate": 0.00012637919366561556, "loss": 1.4128, "step": 12079 }, { "epoch": 0.43261052518488013, "grad_norm": 1.502324104309082, "learning_rate": 0.00012636800531687728, "loss": 1.3671, "step": 12080 }, { "epoch": 0.4326463373144484, "grad_norm": 2.5059921741485596, "learning_rate": 0.00012635681661339146, "loss": 1.7858, "step": 12081 }, { "epoch": 0.4326821494440167, "grad_norm": 1.9203873872756958, "learning_rate": 0.00012634562755530867, "loss": 1.7033, "step": 12082 }, { "epoch": 0.432717961573585, "grad_norm": 1.613374948501587, "learning_rate": 0.00012633443814277946, "loss": 1.1675, "step": 12083 }, { "epoch": 0.43275377370315327, "grad_norm": 1.6708298921585083, "learning_rate": 0.00012632324837595434, "loss": 1.5178, "step": 12084 }, { "epoch": 0.43278958583272154, "grad_norm": 1.9573644399642944, "learning_rate": 0.00012631205825498388, "loss": 1.3646, "step": 12085 }, { "epoch": 0.4328253979622898, "grad_norm": 1.6646674871444702, "learning_rate": 0.0001263008677800186, "loss": 1.2774, "step": 12086 }, { "epoch": 0.43286121009185813, "grad_norm": 1.70304536819458, "learning_rate": 0.0001262896769512091, "loss": 1.2577, "step": 12087 }, { "epoch": 0.4328970222214264, "grad_norm": 2.205632209777832, "learning_rate": 0.0001262784857687059, "loss": 1.7386, "step": 12088 }, { "epoch": 0.43293283435099467, "grad_norm": 1.4651166200637817, "learning_rate": 0.00012626729423265956, "loss": 1.5016, "step": 12089 }, { "epoch": 0.432968646480563, "grad_norm": 2.221482992172241, "learning_rate": 0.00012625610234322064, "loss": 1.6606, "step": 12090 }, { "epoch": 0.43300445861013126, "grad_norm": 1.4816288948059082, "learning_rate": 0.00012624491010053976, "loss": 1.5373, "step": 12091 }, { "epoch": 0.43304027073969953, "grad_norm": 3.446542978286743, "learning_rate": 0.00012623371750476747, "loss": 2.0685, "step": 12092 }, { "epoch": 0.4330760828692678, "grad_norm": 1.557287573814392, "learning_rate": 0.00012622252455605435, "loss": 1.4696, "step": 12093 }, { "epoch": 0.4331118949988361, "grad_norm": 1.9975833892822266, "learning_rate": 0.00012621133125455093, "loss": 1.5988, "step": 12094 }, { "epoch": 0.4331477071284044, "grad_norm": 1.311399221420288, "learning_rate": 0.0001262001376004079, "loss": 1.2518, "step": 12095 }, { "epoch": 0.43318351925797266, "grad_norm": 1.9268242120742798, "learning_rate": 0.00012618894359377585, "loss": 1.3004, "step": 12096 }, { "epoch": 0.433219331387541, "grad_norm": 1.7908586263656616, "learning_rate": 0.0001261777492348053, "loss": 1.5952, "step": 12097 }, { "epoch": 0.43325514351710925, "grad_norm": 1.4732061624526978, "learning_rate": 0.00012616655452364693, "loss": 1.6161, "step": 12098 }, { "epoch": 0.4332909556466775, "grad_norm": 1.9595246315002441, "learning_rate": 0.0001261553594604513, "loss": 1.5878, "step": 12099 }, { "epoch": 0.4333267677762458, "grad_norm": 1.7076550722122192, "learning_rate": 0.00012614416404536905, "loss": 1.602, "step": 12100 }, { "epoch": 0.4333625799058141, "grad_norm": 1.8889092206954956, "learning_rate": 0.00012613296827855078, "loss": 1.4688, "step": 12101 }, { "epoch": 0.4333983920353824, "grad_norm": 1.7788153886795044, "learning_rate": 0.0001261217721601472, "loss": 1.7163, "step": 12102 }, { "epoch": 0.43343420416495065, "grad_norm": 1.702117919921875, "learning_rate": 0.00012611057569030876, "loss": 1.3403, "step": 12103 }, { "epoch": 0.433470016294519, "grad_norm": 1.5412400960922241, "learning_rate": 0.0001260993788691863, "loss": 1.5185, "step": 12104 }, { "epoch": 0.43350582842408725, "grad_norm": 1.8668831586837769, "learning_rate": 0.00012608818169693032, "loss": 1.6909, "step": 12105 }, { "epoch": 0.4335416405536555, "grad_norm": 2.0278031826019287, "learning_rate": 0.00012607698417369152, "loss": 1.4961, "step": 12106 }, { "epoch": 0.4335774526832238, "grad_norm": 1.955277919769287, "learning_rate": 0.00012606578629962054, "loss": 1.8627, "step": 12107 }, { "epoch": 0.4336132648127921, "grad_norm": 1.6680927276611328, "learning_rate": 0.00012605458807486797, "loss": 1.3742, "step": 12108 }, { "epoch": 0.4336490769423604, "grad_norm": 1.52384352684021, "learning_rate": 0.0001260433894995846, "loss": 1.182, "step": 12109 }, { "epoch": 0.43368488907192865, "grad_norm": 1.390926718711853, "learning_rate": 0.00012603219057392097, "loss": 1.4092, "step": 12110 }, { "epoch": 0.43372070120149697, "grad_norm": 1.52166748046875, "learning_rate": 0.0001260209912980278, "loss": 1.6895, "step": 12111 }, { "epoch": 0.43375651333106524, "grad_norm": 1.4530707597732544, "learning_rate": 0.0001260097916720558, "loss": 1.0416, "step": 12112 }, { "epoch": 0.4337923254606335, "grad_norm": 1.5703519582748413, "learning_rate": 0.00012599859169615558, "loss": 2.0043, "step": 12113 }, { "epoch": 0.4338281375902018, "grad_norm": 1.6512939929962158, "learning_rate": 0.00012598739137047784, "loss": 1.549, "step": 12114 }, { "epoch": 0.4338639497197701, "grad_norm": 2.4029769897460938, "learning_rate": 0.00012597619069517328, "loss": 1.8357, "step": 12115 }, { "epoch": 0.4338997618493384, "grad_norm": 1.5254900455474854, "learning_rate": 0.00012596498967039257, "loss": 1.5647, "step": 12116 }, { "epoch": 0.43393557397890664, "grad_norm": 2.040191411972046, "learning_rate": 0.0001259537882962864, "loss": 1.4003, "step": 12117 }, { "epoch": 0.43397138610847497, "grad_norm": 1.724648118019104, "learning_rate": 0.0001259425865730055, "loss": 1.3143, "step": 12118 }, { "epoch": 0.43400719823804323, "grad_norm": 1.1972649097442627, "learning_rate": 0.00012593138450070056, "loss": 1.3465, "step": 12119 }, { "epoch": 0.4340430103676115, "grad_norm": 1.4744864702224731, "learning_rate": 0.0001259201820795223, "loss": 1.3086, "step": 12120 }, { "epoch": 0.4340788224971798, "grad_norm": 1.8176345825195312, "learning_rate": 0.00012590897930962142, "loss": 1.3817, "step": 12121 }, { "epoch": 0.4341146346267481, "grad_norm": 1.617890477180481, "learning_rate": 0.00012589777619114863, "loss": 1.7052, "step": 12122 }, { "epoch": 0.43415044675631637, "grad_norm": 2.1249704360961914, "learning_rate": 0.0001258865727242547, "loss": 1.4104, "step": 12123 }, { "epoch": 0.43418625888588464, "grad_norm": 1.825442910194397, "learning_rate": 0.00012587536890909033, "loss": 1.6887, "step": 12124 }, { "epoch": 0.43422207101545296, "grad_norm": 1.7071682214736938, "learning_rate": 0.0001258641647458062, "loss": 1.5086, "step": 12125 }, { "epoch": 0.43425788314502123, "grad_norm": 1.7802238464355469, "learning_rate": 0.00012585296023455314, "loss": 1.6946, "step": 12126 }, { "epoch": 0.4342936952745895, "grad_norm": 1.907939076423645, "learning_rate": 0.00012584175537548183, "loss": 1.3304, "step": 12127 }, { "epoch": 0.43432950740415777, "grad_norm": 1.8659141063690186, "learning_rate": 0.00012583055016874303, "loss": 1.4764, "step": 12128 }, { "epoch": 0.4343653195337261, "grad_norm": 1.5259766578674316, "learning_rate": 0.00012581934461448747, "loss": 1.5803, "step": 12129 }, { "epoch": 0.43440113166329436, "grad_norm": 2.2665178775787354, "learning_rate": 0.00012580813871286597, "loss": 1.5027, "step": 12130 }, { "epoch": 0.43443694379286263, "grad_norm": 1.9262688159942627, "learning_rate": 0.00012579693246402924, "loss": 1.5477, "step": 12131 }, { "epoch": 0.43447275592243095, "grad_norm": 2.344991683959961, "learning_rate": 0.00012578572586812806, "loss": 1.3432, "step": 12132 }, { "epoch": 0.4345085680519992, "grad_norm": 2.302216053009033, "learning_rate": 0.00012577451892531322, "loss": 1.5703, "step": 12133 }, { "epoch": 0.4345443801815675, "grad_norm": 1.6896110773086548, "learning_rate": 0.00012576331163573548, "loss": 1.3081, "step": 12134 }, { "epoch": 0.43458019231113576, "grad_norm": 1.8697706460952759, "learning_rate": 0.00012575210399954557, "loss": 1.3996, "step": 12135 }, { "epoch": 0.4346160044407041, "grad_norm": 1.9721256494522095, "learning_rate": 0.00012574089601689433, "loss": 1.153, "step": 12136 }, { "epoch": 0.43465181657027235, "grad_norm": 2.7742340564727783, "learning_rate": 0.00012572968768793257, "loss": 1.7565, "step": 12137 }, { "epoch": 0.4346876286998406, "grad_norm": 2.0509705543518066, "learning_rate": 0.00012571847901281103, "loss": 1.3774, "step": 12138 }, { "epoch": 0.43472344082940895, "grad_norm": 1.5328233242034912, "learning_rate": 0.0001257072699916805, "loss": 1.3377, "step": 12139 }, { "epoch": 0.4347592529589772, "grad_norm": 1.7979052066802979, "learning_rate": 0.00012569606062469186, "loss": 1.544, "step": 12140 }, { "epoch": 0.4347950650885455, "grad_norm": 1.7817195653915405, "learning_rate": 0.00012568485091199585, "loss": 1.3429, "step": 12141 }, { "epoch": 0.43483087721811375, "grad_norm": 1.9444987773895264, "learning_rate": 0.0001256736408537433, "loss": 1.6757, "step": 12142 }, { "epoch": 0.4348666893476821, "grad_norm": 1.5094964504241943, "learning_rate": 0.00012566243045008504, "loss": 1.6575, "step": 12143 }, { "epoch": 0.43490250147725035, "grad_norm": 2.030996322631836, "learning_rate": 0.0001256512197011719, "loss": 1.636, "step": 12144 }, { "epoch": 0.4349383136068186, "grad_norm": 1.5814388990402222, "learning_rate": 0.00012564000860715464, "loss": 1.5221, "step": 12145 }, { "epoch": 0.43497412573638694, "grad_norm": 3.0495545864105225, "learning_rate": 0.00012562879716818416, "loss": 1.6251, "step": 12146 }, { "epoch": 0.4350099378659552, "grad_norm": 1.359506368637085, "learning_rate": 0.00012561758538441126, "loss": 1.3825, "step": 12147 }, { "epoch": 0.4350457499955235, "grad_norm": 1.534612774848938, "learning_rate": 0.0001256063732559868, "loss": 1.283, "step": 12148 }, { "epoch": 0.43508156212509175, "grad_norm": 1.858231544494629, "learning_rate": 0.0001255951607830616, "loss": 1.363, "step": 12149 }, { "epoch": 0.43511737425466007, "grad_norm": 1.4661773443222046, "learning_rate": 0.00012558394796578656, "loss": 1.3254, "step": 12150 }, { "epoch": 0.43515318638422834, "grad_norm": 1.5293601751327515, "learning_rate": 0.0001255727348043125, "loss": 1.7608, "step": 12151 }, { "epoch": 0.4351889985137966, "grad_norm": 1.2543964385986328, "learning_rate": 0.00012556152129879027, "loss": 1.5267, "step": 12152 }, { "epoch": 0.43522481064336493, "grad_norm": 1.9496577978134155, "learning_rate": 0.00012555030744937075, "loss": 1.2079, "step": 12153 }, { "epoch": 0.4352606227729332, "grad_norm": 2.417306661605835, "learning_rate": 0.0001255390932562048, "loss": 1.3698, "step": 12154 }, { "epoch": 0.4352964349025015, "grad_norm": 1.6135632991790771, "learning_rate": 0.00012552787871944327, "loss": 1.6963, "step": 12155 }, { "epoch": 0.43533224703206974, "grad_norm": 2.258427619934082, "learning_rate": 0.00012551666383923705, "loss": 1.4545, "step": 12156 }, { "epoch": 0.43536805916163807, "grad_norm": 1.6863501071929932, "learning_rate": 0.00012550544861573707, "loss": 1.113, "step": 12157 }, { "epoch": 0.43540387129120633, "grad_norm": 1.6821973323822021, "learning_rate": 0.0001254942330490942, "loss": 1.6309, "step": 12158 }, { "epoch": 0.4354396834207746, "grad_norm": 1.7726070880889893, "learning_rate": 0.00012548301713945925, "loss": 1.5211, "step": 12159 }, { "epoch": 0.43547549555034293, "grad_norm": 1.206180453300476, "learning_rate": 0.00012547180088698322, "loss": 1.4238, "step": 12160 }, { "epoch": 0.4355113076799112, "grad_norm": 2.325413227081299, "learning_rate": 0.00012546058429181692, "loss": 1.8346, "step": 12161 }, { "epoch": 0.43554711980947947, "grad_norm": 1.9589569568634033, "learning_rate": 0.00012544936735411135, "loss": 1.4175, "step": 12162 }, { "epoch": 0.43558293193904773, "grad_norm": 2.0499401092529297, "learning_rate": 0.00012543815007401733, "loss": 1.6312, "step": 12163 }, { "epoch": 0.43561874406861606, "grad_norm": 1.615285873413086, "learning_rate": 0.00012542693245168584, "loss": 1.5656, "step": 12164 }, { "epoch": 0.43565455619818433, "grad_norm": 2.342385768890381, "learning_rate": 0.00012541571448726775, "loss": 1.3668, "step": 12165 }, { "epoch": 0.4356903683277526, "grad_norm": 2.1468067169189453, "learning_rate": 0.00012540449618091403, "loss": 1.5807, "step": 12166 }, { "epoch": 0.4357261804573209, "grad_norm": 1.7686083316802979, "learning_rate": 0.00012539327753277555, "loss": 1.7541, "step": 12167 }, { "epoch": 0.4357619925868892, "grad_norm": 1.9072706699371338, "learning_rate": 0.00012538205854300334, "loss": 1.6316, "step": 12168 }, { "epoch": 0.43579780471645746, "grad_norm": 1.3254297971725464, "learning_rate": 0.00012537083921174822, "loss": 1.516, "step": 12169 }, { "epoch": 0.43583361684602573, "grad_norm": 1.3646912574768066, "learning_rate": 0.0001253596195391612, "loss": 1.3667, "step": 12170 }, { "epoch": 0.43586942897559405, "grad_norm": 1.3135740756988525, "learning_rate": 0.0001253483995253932, "loss": 1.669, "step": 12171 }, { "epoch": 0.4359052411051623, "grad_norm": 1.6707199811935425, "learning_rate": 0.00012533717917059516, "loss": 1.4503, "step": 12172 }, { "epoch": 0.4359410532347306, "grad_norm": 1.645582675933838, "learning_rate": 0.0001253259584749181, "loss": 1.3948, "step": 12173 }, { "epoch": 0.4359768653642989, "grad_norm": 1.7216689586639404, "learning_rate": 0.0001253147374385129, "loss": 1.4025, "step": 12174 }, { "epoch": 0.4360126774938672, "grad_norm": 1.6870415210723877, "learning_rate": 0.0001253035160615306, "loss": 1.7272, "step": 12175 }, { "epoch": 0.43604848962343545, "grad_norm": 1.3931629657745361, "learning_rate": 0.00012529229434412212, "loss": 1.3757, "step": 12176 }, { "epoch": 0.4360843017530037, "grad_norm": 1.6886948347091675, "learning_rate": 0.00012528107228643843, "loss": 1.4498, "step": 12177 }, { "epoch": 0.43612011388257205, "grad_norm": 1.4105387926101685, "learning_rate": 0.00012526984988863054, "loss": 1.5208, "step": 12178 }, { "epoch": 0.4361559260121403, "grad_norm": 2.1833765506744385, "learning_rate": 0.0001252586271508494, "loss": 1.7194, "step": 12179 }, { "epoch": 0.4361917381417086, "grad_norm": 1.9593642950057983, "learning_rate": 0.000125247404073246, "loss": 1.5396, "step": 12180 }, { "epoch": 0.43622755027127685, "grad_norm": 2.019223690032959, "learning_rate": 0.0001252361806559714, "loss": 1.6001, "step": 12181 }, { "epoch": 0.4362633624008452, "grad_norm": 2.205815076828003, "learning_rate": 0.00012522495689917647, "loss": 1.5243, "step": 12182 }, { "epoch": 0.43629917453041345, "grad_norm": 1.9048100709915161, "learning_rate": 0.00012521373280301233, "loss": 1.6801, "step": 12183 }, { "epoch": 0.4363349866599817, "grad_norm": 1.4723803997039795, "learning_rate": 0.0001252025083676299, "loss": 1.1884, "step": 12184 }, { "epoch": 0.43637079878955004, "grad_norm": 1.567564845085144, "learning_rate": 0.00012519128359318027, "loss": 1.4242, "step": 12185 }, { "epoch": 0.4364066109191183, "grad_norm": 1.5921801328659058, "learning_rate": 0.0001251800584798144, "loss": 1.4445, "step": 12186 }, { "epoch": 0.4364424230486866, "grad_norm": 2.2769577503204346, "learning_rate": 0.0001251688330276833, "loss": 1.5012, "step": 12187 }, { "epoch": 0.43647823517825485, "grad_norm": 1.4964098930358887, "learning_rate": 0.00012515760723693807, "loss": 1.7067, "step": 12188 }, { "epoch": 0.43651404730782317, "grad_norm": 1.4439140558242798, "learning_rate": 0.00012514638110772963, "loss": 1.1494, "step": 12189 }, { "epoch": 0.43654985943739144, "grad_norm": 1.3597850799560547, "learning_rate": 0.0001251351546402091, "loss": 1.672, "step": 12190 }, { "epoch": 0.4365856715669597, "grad_norm": 1.3259572982788086, "learning_rate": 0.00012512392783452746, "loss": 1.47, "step": 12191 }, { "epoch": 0.43662148369652803, "grad_norm": 1.7703850269317627, "learning_rate": 0.00012511270069083582, "loss": 1.4195, "step": 12192 }, { "epoch": 0.4366572958260963, "grad_norm": 1.2541024684906006, "learning_rate": 0.00012510147320928515, "loss": 1.3005, "step": 12193 }, { "epoch": 0.4366931079556646, "grad_norm": 1.4400556087493896, "learning_rate": 0.00012509024539002653, "loss": 1.1587, "step": 12194 }, { "epoch": 0.43672892008523284, "grad_norm": 1.3740122318267822, "learning_rate": 0.00012507901723321106, "loss": 1.5497, "step": 12195 }, { "epoch": 0.43676473221480117, "grad_norm": 1.596411108970642, "learning_rate": 0.00012506778873898976, "loss": 1.5982, "step": 12196 }, { "epoch": 0.43680054434436943, "grad_norm": 1.8824422359466553, "learning_rate": 0.00012505655990751368, "loss": 1.9263, "step": 12197 }, { "epoch": 0.4368363564739377, "grad_norm": 1.811933159828186, "learning_rate": 0.0001250453307389339, "loss": 1.6699, "step": 12198 }, { "epoch": 0.43687216860350603, "grad_norm": 1.7005141973495483, "learning_rate": 0.0001250341012334015, "loss": 1.6277, "step": 12199 }, { "epoch": 0.4369079807330743, "grad_norm": 1.5186580419540405, "learning_rate": 0.00012502287139106756, "loss": 1.174, "step": 12200 }, { "epoch": 0.43694379286264257, "grad_norm": 1.7548470497131348, "learning_rate": 0.0001250116412120832, "loss": 1.6404, "step": 12201 }, { "epoch": 0.43697960499221083, "grad_norm": 1.803139090538025, "learning_rate": 0.00012500041069659943, "loss": 1.2575, "step": 12202 }, { "epoch": 0.43701541712177916, "grad_norm": 1.5411206483840942, "learning_rate": 0.00012498917984476738, "loss": 1.5819, "step": 12203 }, { "epoch": 0.43705122925134743, "grad_norm": 1.999428391456604, "learning_rate": 0.00012497794865673817, "loss": 1.2803, "step": 12204 }, { "epoch": 0.4370870413809157, "grad_norm": 1.7252352237701416, "learning_rate": 0.0001249667171326629, "loss": 1.7101, "step": 12205 }, { "epoch": 0.437122853510484, "grad_norm": 1.4021079540252686, "learning_rate": 0.0001249554852726926, "loss": 1.3464, "step": 12206 }, { "epoch": 0.4371586656400523, "grad_norm": 1.7048399448394775, "learning_rate": 0.00012494425307697847, "loss": 1.5214, "step": 12207 }, { "epoch": 0.43719447776962056, "grad_norm": 1.2630609273910522, "learning_rate": 0.0001249330205456716, "loss": 1.5876, "step": 12208 }, { "epoch": 0.43723028989918883, "grad_norm": 1.411924958229065, "learning_rate": 0.00012492178767892307, "loss": 1.4632, "step": 12209 }, { "epoch": 0.43726610202875715, "grad_norm": 1.7386451959609985, "learning_rate": 0.00012491055447688405, "loss": 1.4687, "step": 12210 }, { "epoch": 0.4373019141583254, "grad_norm": 1.9168484210968018, "learning_rate": 0.00012489932093970568, "loss": 1.6312, "step": 12211 }, { "epoch": 0.4373377262878937, "grad_norm": 2.0553414821624756, "learning_rate": 0.00012488808706753902, "loss": 1.7418, "step": 12212 }, { "epoch": 0.437373538417462, "grad_norm": 1.310865879058838, "learning_rate": 0.00012487685286053526, "loss": 1.6169, "step": 12213 }, { "epoch": 0.4374093505470303, "grad_norm": 1.4720237255096436, "learning_rate": 0.00012486561831884552, "loss": 1.11, "step": 12214 }, { "epoch": 0.43744516267659855, "grad_norm": 1.5787687301635742, "learning_rate": 0.000124854383442621, "loss": 1.6203, "step": 12215 }, { "epoch": 0.4374809748061668, "grad_norm": 1.404030203819275, "learning_rate": 0.00012484314823201276, "loss": 1.2607, "step": 12216 }, { "epoch": 0.43751678693573515, "grad_norm": 1.9852256774902344, "learning_rate": 0.00012483191268717207, "loss": 1.6324, "step": 12217 }, { "epoch": 0.4375525990653034, "grad_norm": 1.8439569473266602, "learning_rate": 0.00012482067680824998, "loss": 1.3118, "step": 12218 }, { "epoch": 0.4375884111948717, "grad_norm": 1.3990365266799927, "learning_rate": 0.0001248094405953977, "loss": 1.4231, "step": 12219 }, { "epoch": 0.43762422332444, "grad_norm": 1.943037748336792, "learning_rate": 0.00012479820404876643, "loss": 1.4883, "step": 12220 }, { "epoch": 0.4376600354540083, "grad_norm": 1.4832576513290405, "learning_rate": 0.0001247869671685073, "loss": 1.3203, "step": 12221 }, { "epoch": 0.43769584758357655, "grad_norm": 1.6420166492462158, "learning_rate": 0.0001247757299547715, "loss": 1.254, "step": 12222 }, { "epoch": 0.4377316597131448, "grad_norm": 2.4458463191986084, "learning_rate": 0.00012476449240771023, "loss": 1.5143, "step": 12223 }, { "epoch": 0.43776747184271314, "grad_norm": 1.7896454334259033, "learning_rate": 0.0001247532545274746, "loss": 1.7974, "step": 12224 }, { "epoch": 0.4378032839722814, "grad_norm": 1.4857879877090454, "learning_rate": 0.00012474201631421588, "loss": 1.3927, "step": 12225 }, { "epoch": 0.4378390961018497, "grad_norm": 2.298076629638672, "learning_rate": 0.00012473077776808527, "loss": 1.5126, "step": 12226 }, { "epoch": 0.437874908231418, "grad_norm": 1.8825565576553345, "learning_rate": 0.00012471953888923393, "loss": 1.4028, "step": 12227 }, { "epoch": 0.43791072036098627, "grad_norm": 1.3885728120803833, "learning_rate": 0.00012470829967781307, "loss": 1.3174, "step": 12228 }, { "epoch": 0.43794653249055454, "grad_norm": 2.4574766159057617, "learning_rate": 0.00012469706013397395, "loss": 1.7062, "step": 12229 }, { "epoch": 0.4379823446201228, "grad_norm": 1.4123119115829468, "learning_rate": 0.00012468582025786774, "loss": 1.656, "step": 12230 }, { "epoch": 0.43801815674969113, "grad_norm": 1.421113133430481, "learning_rate": 0.0001246745800496456, "loss": 1.444, "step": 12231 }, { "epoch": 0.4380539688792594, "grad_norm": 2.3142082691192627, "learning_rate": 0.00012466333950945889, "loss": 1.6337, "step": 12232 }, { "epoch": 0.4380897810088277, "grad_norm": 2.1670494079589844, "learning_rate": 0.0001246520986374587, "loss": 1.3216, "step": 12233 }, { "epoch": 0.438125593138396, "grad_norm": 1.628150463104248, "learning_rate": 0.00012464085743379635, "loss": 1.4509, "step": 12234 }, { "epoch": 0.43816140526796427, "grad_norm": 1.204681634902954, "learning_rate": 0.000124629615898623, "loss": 1.4405, "step": 12235 }, { "epoch": 0.43819721739753253, "grad_norm": 1.9603196382522583, "learning_rate": 0.00012461837403209, "loss": 1.7334, "step": 12236 }, { "epoch": 0.4382330295271008, "grad_norm": 1.7064441442489624, "learning_rate": 0.0001246071318343485, "loss": 1.3215, "step": 12237 }, { "epoch": 0.43826884165666913, "grad_norm": 2.009902238845825, "learning_rate": 0.0001245958893055498, "loss": 1.3249, "step": 12238 }, { "epoch": 0.4383046537862374, "grad_norm": 1.3667140007019043, "learning_rate": 0.00012458464644584516, "loss": 1.254, "step": 12239 }, { "epoch": 0.43834046591580567, "grad_norm": 1.28581964969635, "learning_rate": 0.00012457340325538576, "loss": 1.4058, "step": 12240 }, { "epoch": 0.438376278045374, "grad_norm": 1.8916394710540771, "learning_rate": 0.00012456215973432295, "loss": 1.2694, "step": 12241 }, { "epoch": 0.43841209017494226, "grad_norm": 1.4369720220565796, "learning_rate": 0.00012455091588280793, "loss": 1.7439, "step": 12242 }, { "epoch": 0.43844790230451053, "grad_norm": 1.723207712173462, "learning_rate": 0.00012453967170099204, "loss": 1.3808, "step": 12243 }, { "epoch": 0.4384837144340788, "grad_norm": 2.0032958984375, "learning_rate": 0.00012452842718902647, "loss": 1.5483, "step": 12244 }, { "epoch": 0.4385195265636471, "grad_norm": 1.5736902952194214, "learning_rate": 0.00012451718234706262, "loss": 1.3339, "step": 12245 }, { "epoch": 0.4385553386932154, "grad_norm": 1.6149911880493164, "learning_rate": 0.00012450593717525167, "loss": 1.3523, "step": 12246 }, { "epoch": 0.43859115082278366, "grad_norm": 2.623713254928589, "learning_rate": 0.00012449469167374498, "loss": 1.7543, "step": 12247 }, { "epoch": 0.438626962952352, "grad_norm": 1.808893084526062, "learning_rate": 0.00012448344584269379, "loss": 1.5875, "step": 12248 }, { "epoch": 0.43866277508192025, "grad_norm": 1.4351035356521606, "learning_rate": 0.0001244721996822494, "loss": 1.4845, "step": 12249 }, { "epoch": 0.4386985872114885, "grad_norm": 1.4466192722320557, "learning_rate": 0.00012446095319256314, "loss": 1.476, "step": 12250 }, { "epoch": 0.4387343993410568, "grad_norm": 1.7350643873214722, "learning_rate": 0.00012444970637378631, "loss": 1.4816, "step": 12251 }, { "epoch": 0.4387702114706251, "grad_norm": 1.5797699689865112, "learning_rate": 0.0001244384592260702, "loss": 1.4879, "step": 12252 }, { "epoch": 0.4388060236001934, "grad_norm": 1.4239426851272583, "learning_rate": 0.00012442721174956616, "loss": 1.5366, "step": 12253 }, { "epoch": 0.43884183572976165, "grad_norm": 1.4414507150650024, "learning_rate": 0.0001244159639444255, "loss": 1.6121, "step": 12254 }, { "epoch": 0.43887764785933, "grad_norm": 1.5636897087097168, "learning_rate": 0.00012440471581079952, "loss": 1.758, "step": 12255 }, { "epoch": 0.43891345998889825, "grad_norm": 1.4484236240386963, "learning_rate": 0.0001243934673488396, "loss": 1.4626, "step": 12256 }, { "epoch": 0.4389492721184665, "grad_norm": 1.4698162078857422, "learning_rate": 0.00012438221855869702, "loss": 1.4597, "step": 12257 }, { "epoch": 0.4389850842480348, "grad_norm": 1.6307086944580078, "learning_rate": 0.00012437096944052317, "loss": 1.5859, "step": 12258 }, { "epoch": 0.4390208963776031, "grad_norm": 1.7705332040786743, "learning_rate": 0.0001243597199944693, "loss": 1.4369, "step": 12259 }, { "epoch": 0.4390567085071714, "grad_norm": 1.8133080005645752, "learning_rate": 0.0001243484702206869, "loss": 1.2604, "step": 12260 }, { "epoch": 0.43909252063673965, "grad_norm": 1.4457899332046509, "learning_rate": 0.00012433722011932717, "loss": 1.3294, "step": 12261 }, { "epoch": 0.43912833276630797, "grad_norm": 1.6493401527404785, "learning_rate": 0.00012432596969054157, "loss": 1.7194, "step": 12262 }, { "epoch": 0.43916414489587624, "grad_norm": 1.8156285285949707, "learning_rate": 0.0001243147189344814, "loss": 1.4789, "step": 12263 }, { "epoch": 0.4391999570254445, "grad_norm": 1.2668172121047974, "learning_rate": 0.0001243034678512981, "loss": 1.5012, "step": 12264 }, { "epoch": 0.4392357691550128, "grad_norm": 1.7122809886932373, "learning_rate": 0.00012429221644114294, "loss": 1.7531, "step": 12265 }, { "epoch": 0.4392715812845811, "grad_norm": 1.553624153137207, "learning_rate": 0.00012428096470416738, "loss": 1.2313, "step": 12266 }, { "epoch": 0.43930739341414937, "grad_norm": 2.247083902359009, "learning_rate": 0.00012426971264052275, "loss": 1.3864, "step": 12267 }, { "epoch": 0.43934320554371764, "grad_norm": 1.7876657247543335, "learning_rate": 0.00012425846025036042, "loss": 1.4111, "step": 12268 }, { "epoch": 0.43937901767328597, "grad_norm": 1.5700833797454834, "learning_rate": 0.0001242472075338318, "loss": 1.5301, "step": 12269 }, { "epoch": 0.43941482980285423, "grad_norm": 1.5357482433319092, "learning_rate": 0.0001242359544910883, "loss": 1.5518, "step": 12270 }, { "epoch": 0.4394506419324225, "grad_norm": 1.4696317911148071, "learning_rate": 0.00012422470112228125, "loss": 1.6317, "step": 12271 }, { "epoch": 0.43948645406199077, "grad_norm": 1.7287973165512085, "learning_rate": 0.00012421344742756215, "loss": 1.3712, "step": 12272 }, { "epoch": 0.4395222661915591, "grad_norm": 3.024156093597412, "learning_rate": 0.00012420219340708236, "loss": 1.6948, "step": 12273 }, { "epoch": 0.43955807832112737, "grad_norm": 1.4425593614578247, "learning_rate": 0.00012419093906099323, "loss": 1.5203, "step": 12274 }, { "epoch": 0.43959389045069563, "grad_norm": 1.4806897640228271, "learning_rate": 0.00012417968438944622, "loss": 1.3722, "step": 12275 }, { "epoch": 0.43962970258026396, "grad_norm": 2.641904354095459, "learning_rate": 0.0001241684293925928, "loss": 1.5145, "step": 12276 }, { "epoch": 0.43966551470983223, "grad_norm": 1.801173448562622, "learning_rate": 0.00012415717407058427, "loss": 1.5123, "step": 12277 }, { "epoch": 0.4397013268394005, "grad_norm": 1.3477219343185425, "learning_rate": 0.00012414591842357215, "loss": 1.2466, "step": 12278 }, { "epoch": 0.43973713896896877, "grad_norm": 1.4272079467773438, "learning_rate": 0.00012413466245170783, "loss": 1.1956, "step": 12279 }, { "epoch": 0.4397729510985371, "grad_norm": 1.5467042922973633, "learning_rate": 0.0001241234061551428, "loss": 1.3165, "step": 12280 }, { "epoch": 0.43980876322810536, "grad_norm": 2.0391323566436768, "learning_rate": 0.00012411214953402842, "loss": 1.5702, "step": 12281 }, { "epoch": 0.43984457535767363, "grad_norm": 1.9930360317230225, "learning_rate": 0.00012410089258851618, "loss": 1.6248, "step": 12282 }, { "epoch": 0.43988038748724195, "grad_norm": 1.4755240678787231, "learning_rate": 0.00012408963531875753, "loss": 1.6192, "step": 12283 }, { "epoch": 0.4399161996168102, "grad_norm": 1.4436578750610352, "learning_rate": 0.00012407837772490389, "loss": 1.3747, "step": 12284 }, { "epoch": 0.4399520117463785, "grad_norm": 1.4381917715072632, "learning_rate": 0.00012406711980710676, "loss": 1.5223, "step": 12285 }, { "epoch": 0.43998782387594676, "grad_norm": 1.4474295377731323, "learning_rate": 0.00012405586156551753, "loss": 1.4443, "step": 12286 }, { "epoch": 0.4400236360055151, "grad_norm": 2.0564377307891846, "learning_rate": 0.00012404460300028774, "loss": 1.13, "step": 12287 }, { "epoch": 0.44005944813508335, "grad_norm": 1.5260390043258667, "learning_rate": 0.00012403334411156884, "loss": 1.5361, "step": 12288 }, { "epoch": 0.4400952602646516, "grad_norm": 1.644852638244629, "learning_rate": 0.0001240220848995123, "loss": 1.5544, "step": 12289 }, { "epoch": 0.44013107239421995, "grad_norm": 1.850511908531189, "learning_rate": 0.00012401082536426958, "loss": 1.5099, "step": 12290 }, { "epoch": 0.4401668845237882, "grad_norm": 1.6762350797653198, "learning_rate": 0.00012399956550599218, "loss": 1.6942, "step": 12291 }, { "epoch": 0.4402026966533565, "grad_norm": 1.6215894222259521, "learning_rate": 0.0001239883053248316, "loss": 1.5009, "step": 12292 }, { "epoch": 0.44023850878292475, "grad_norm": 1.8665066957473755, "learning_rate": 0.0001239770448209393, "loss": 1.6581, "step": 12293 }, { "epoch": 0.4402743209124931, "grad_norm": 2.1635780334472656, "learning_rate": 0.00012396578399446678, "loss": 1.5384, "step": 12294 }, { "epoch": 0.44031013304206135, "grad_norm": 1.7075426578521729, "learning_rate": 0.00012395452284556558, "loss": 1.7423, "step": 12295 }, { "epoch": 0.4403459451716296, "grad_norm": 1.9434218406677246, "learning_rate": 0.00012394326137438714, "loss": 1.3288, "step": 12296 }, { "epoch": 0.44038175730119794, "grad_norm": 1.5657232999801636, "learning_rate": 0.000123931999581083, "loss": 1.2778, "step": 12297 }, { "epoch": 0.4404175694307662, "grad_norm": 1.5658713579177856, "learning_rate": 0.00012392073746580472, "loss": 1.5705, "step": 12298 }, { "epoch": 0.4404533815603345, "grad_norm": 1.3626611232757568, "learning_rate": 0.00012390947502870375, "loss": 1.286, "step": 12299 }, { "epoch": 0.44048919368990275, "grad_norm": 1.8273500204086304, "learning_rate": 0.00012389821226993164, "loss": 1.304, "step": 12300 }, { "epoch": 0.44052500581947107, "grad_norm": 1.7179958820343018, "learning_rate": 0.0001238869491896399, "loss": 1.674, "step": 12301 }, { "epoch": 0.44056081794903934, "grad_norm": 1.5758401155471802, "learning_rate": 0.00012387568578798005, "loss": 1.2103, "step": 12302 }, { "epoch": 0.4405966300786076, "grad_norm": 1.8869274854660034, "learning_rate": 0.00012386442206510368, "loss": 1.684, "step": 12303 }, { "epoch": 0.44063244220817593, "grad_norm": 1.4876755475997925, "learning_rate": 0.00012385315802116226, "loss": 1.5119, "step": 12304 }, { "epoch": 0.4406682543377442, "grad_norm": 2.015285015106201, "learning_rate": 0.0001238418936563074, "loss": 1.6014, "step": 12305 }, { "epoch": 0.44070406646731247, "grad_norm": 1.7106752395629883, "learning_rate": 0.0001238306289706906, "loss": 1.6048, "step": 12306 }, { "epoch": 0.44073987859688074, "grad_norm": 1.6270339488983154, "learning_rate": 0.00012381936396446344, "loss": 1.2119, "step": 12307 }, { "epoch": 0.44077569072644907, "grad_norm": 1.604411005973816, "learning_rate": 0.00012380809863777746, "loss": 1.4839, "step": 12308 }, { "epoch": 0.44081150285601733, "grad_norm": 3.05351185798645, "learning_rate": 0.00012379683299078422, "loss": 1.8318, "step": 12309 }, { "epoch": 0.4408473149855856, "grad_norm": 1.8058035373687744, "learning_rate": 0.00012378556702363527, "loss": 1.3283, "step": 12310 }, { "epoch": 0.4408831271151539, "grad_norm": 1.5172128677368164, "learning_rate": 0.00012377430073648218, "loss": 1.4016, "step": 12311 }, { "epoch": 0.4409189392447222, "grad_norm": 2.027984142303467, "learning_rate": 0.0001237630341294766, "loss": 1.5973, "step": 12312 }, { "epoch": 0.44095475137429047, "grad_norm": 1.950453281402588, "learning_rate": 0.00012375176720277002, "loss": 1.521, "step": 12313 }, { "epoch": 0.44099056350385873, "grad_norm": 2.8115463256835938, "learning_rate": 0.00012374049995651405, "loss": 1.5522, "step": 12314 }, { "epoch": 0.44102637563342706, "grad_norm": 1.677557110786438, "learning_rate": 0.00012372923239086024, "loss": 1.4244, "step": 12315 }, { "epoch": 0.44106218776299533, "grad_norm": 1.6209502220153809, "learning_rate": 0.00012371796450596028, "loss": 1.2205, "step": 12316 }, { "epoch": 0.4410979998925636, "grad_norm": 1.4655624628067017, "learning_rate": 0.00012370669630196567, "loss": 1.627, "step": 12317 }, { "epoch": 0.4411338120221319, "grad_norm": 2.140920639038086, "learning_rate": 0.00012369542777902805, "loss": 1.5118, "step": 12318 }, { "epoch": 0.4411696241517002, "grad_norm": 1.743704915046692, "learning_rate": 0.00012368415893729902, "loss": 1.6768, "step": 12319 }, { "epoch": 0.44120543628126846, "grad_norm": 1.668463945388794, "learning_rate": 0.00012367288977693016, "loss": 1.5263, "step": 12320 }, { "epoch": 0.44124124841083673, "grad_norm": 1.3567086458206177, "learning_rate": 0.0001236616202980731, "loss": 1.3551, "step": 12321 }, { "epoch": 0.44127706054040505, "grad_norm": 1.6387543678283691, "learning_rate": 0.0001236503505008795, "loss": 1.2526, "step": 12322 }, { "epoch": 0.4413128726699733, "grad_norm": 1.8538243770599365, "learning_rate": 0.0001236390803855009, "loss": 1.745, "step": 12323 }, { "epoch": 0.4413486847995416, "grad_norm": 1.6371301412582397, "learning_rate": 0.00012362780995208895, "loss": 1.5907, "step": 12324 }, { "epoch": 0.4413844969291099, "grad_norm": 1.6978520154953003, "learning_rate": 0.00012361653920079534, "loss": 1.6795, "step": 12325 }, { "epoch": 0.4414203090586782, "grad_norm": 1.94414484500885, "learning_rate": 0.00012360526813177163, "loss": 1.388, "step": 12326 }, { "epoch": 0.44145612118824645, "grad_norm": 1.762611746788025, "learning_rate": 0.00012359399674516955, "loss": 1.4216, "step": 12327 }, { "epoch": 0.4414919333178147, "grad_norm": 1.6278012990951538, "learning_rate": 0.00012358272504114058, "loss": 1.5087, "step": 12328 }, { "epoch": 0.44152774544738305, "grad_norm": 1.7553491592407227, "learning_rate": 0.00012357145301983651, "loss": 1.6467, "step": 12329 }, { "epoch": 0.4415635575769513, "grad_norm": 1.4429503679275513, "learning_rate": 0.00012356018068140895, "loss": 1.6824, "step": 12330 }, { "epoch": 0.4415993697065196, "grad_norm": 1.9834673404693604, "learning_rate": 0.00012354890802600957, "loss": 1.6054, "step": 12331 }, { "epoch": 0.4416351818360879, "grad_norm": 1.7594319581985474, "learning_rate": 0.00012353763505378997, "loss": 1.431, "step": 12332 }, { "epoch": 0.4416709939656562, "grad_norm": 2.2970147132873535, "learning_rate": 0.00012352636176490186, "loss": 2.0729, "step": 12333 }, { "epoch": 0.44170680609522445, "grad_norm": 1.2946401834487915, "learning_rate": 0.00012351508815949691, "loss": 1.4574, "step": 12334 }, { "epoch": 0.4417426182247927, "grad_norm": 1.8043230772018433, "learning_rate": 0.00012350381423772676, "loss": 1.379, "step": 12335 }, { "epoch": 0.44177843035436104, "grad_norm": 1.3034635782241821, "learning_rate": 0.00012349253999974314, "loss": 1.0734, "step": 12336 }, { "epoch": 0.4418142424839293, "grad_norm": 1.2619051933288574, "learning_rate": 0.00012348126544569767, "loss": 1.4902, "step": 12337 }, { "epoch": 0.4418500546134976, "grad_norm": 1.3467988967895508, "learning_rate": 0.00012346999057574209, "loss": 1.6634, "step": 12338 }, { "epoch": 0.4418858667430659, "grad_norm": 1.8174192905426025, "learning_rate": 0.00012345871539002801, "loss": 1.5092, "step": 12339 }, { "epoch": 0.44192167887263417, "grad_norm": 1.8187414407730103, "learning_rate": 0.00012344743988870722, "loss": 1.4353, "step": 12340 }, { "epoch": 0.44195749100220244, "grad_norm": 1.7980650663375854, "learning_rate": 0.00012343616407193135, "loss": 1.5176, "step": 12341 }, { "epoch": 0.4419933031317707, "grad_norm": 1.7426297664642334, "learning_rate": 0.00012342488793985214, "loss": 1.47, "step": 12342 }, { "epoch": 0.44202911526133903, "grad_norm": 1.9516719579696655, "learning_rate": 0.00012341361149262125, "loss": 1.3465, "step": 12343 }, { "epoch": 0.4420649273909073, "grad_norm": 2.266122817993164, "learning_rate": 0.00012340233473039045, "loss": 1.2277, "step": 12344 }, { "epoch": 0.44210073952047557, "grad_norm": 2.047231674194336, "learning_rate": 0.00012339105765331142, "loss": 1.1451, "step": 12345 }, { "epoch": 0.4421365516500439, "grad_norm": 2.4801621437072754, "learning_rate": 0.00012337978026153587, "loss": 1.3022, "step": 12346 }, { "epoch": 0.44217236377961217, "grad_norm": 1.6295838356018066, "learning_rate": 0.00012336850255521554, "loss": 1.4763, "step": 12347 }, { "epoch": 0.44220817590918043, "grad_norm": 1.4583467245101929, "learning_rate": 0.00012335722453450215, "loss": 1.5955, "step": 12348 }, { "epoch": 0.4422439880387487, "grad_norm": 1.6887112855911255, "learning_rate": 0.00012334594619954742, "loss": 1.3007, "step": 12349 }, { "epoch": 0.442279800168317, "grad_norm": 2.0861968994140625, "learning_rate": 0.0001233346675505031, "loss": 1.613, "step": 12350 }, { "epoch": 0.4423156122978853, "grad_norm": 2.0231857299804688, "learning_rate": 0.00012332338858752094, "loss": 1.5615, "step": 12351 }, { "epoch": 0.44235142442745357, "grad_norm": 2.4194977283477783, "learning_rate": 0.0001233121093107527, "loss": 1.5146, "step": 12352 }, { "epoch": 0.4423872365570219, "grad_norm": 2.190960645675659, "learning_rate": 0.00012330082972035006, "loss": 1.3055, "step": 12353 }, { "epoch": 0.44242304868659016, "grad_norm": 2.3201444149017334, "learning_rate": 0.00012328954981646482, "loss": 1.7702, "step": 12354 }, { "epoch": 0.44245886081615843, "grad_norm": 1.6395111083984375, "learning_rate": 0.0001232782695992487, "loss": 1.3856, "step": 12355 }, { "epoch": 0.4424946729457267, "grad_norm": 2.251711845397949, "learning_rate": 0.00012326698906885353, "loss": 1.8577, "step": 12356 }, { "epoch": 0.442530485075295, "grad_norm": 1.6219013929367065, "learning_rate": 0.00012325570822543103, "loss": 1.3516, "step": 12357 }, { "epoch": 0.4425662972048633, "grad_norm": 1.8324930667877197, "learning_rate": 0.00012324442706913296, "loss": 1.3791, "step": 12358 }, { "epoch": 0.44260210933443156, "grad_norm": 2.259781837463379, "learning_rate": 0.0001232331456001111, "loss": 1.6316, "step": 12359 }, { "epoch": 0.4426379214639999, "grad_norm": 2.230318307876587, "learning_rate": 0.00012322186381851725, "loss": 1.7068, "step": 12360 }, { "epoch": 0.44267373359356815, "grad_norm": 1.9674252271652222, "learning_rate": 0.00012321058172450318, "loss": 1.7323, "step": 12361 }, { "epoch": 0.4427095457231364, "grad_norm": 1.609484314918518, "learning_rate": 0.0001231992993182207, "loss": 1.5709, "step": 12362 }, { "epoch": 0.4427453578527047, "grad_norm": 1.5630927085876465, "learning_rate": 0.00012318801659982152, "loss": 1.605, "step": 12363 }, { "epoch": 0.442781169982273, "grad_norm": 1.5356788635253906, "learning_rate": 0.00012317673356945753, "loss": 1.161, "step": 12364 }, { "epoch": 0.4428169821118413, "grad_norm": 2.1178667545318604, "learning_rate": 0.00012316545022728043, "loss": 1.4586, "step": 12365 }, { "epoch": 0.44285279424140955, "grad_norm": 3.0236313343048096, "learning_rate": 0.00012315416657344213, "loss": 1.5712, "step": 12366 }, { "epoch": 0.4428886063709779, "grad_norm": 2.9537036418914795, "learning_rate": 0.00012314288260809435, "loss": 1.5299, "step": 12367 }, { "epoch": 0.44292441850054615, "grad_norm": 1.7508946657180786, "learning_rate": 0.00012313159833138892, "loss": 1.7762, "step": 12368 }, { "epoch": 0.4429602306301144, "grad_norm": 1.5886273384094238, "learning_rate": 0.00012312031374347773, "loss": 1.4263, "step": 12369 }, { "epoch": 0.4429960427596827, "grad_norm": 1.5932828187942505, "learning_rate": 0.00012310902884451252, "loss": 1.6544, "step": 12370 }, { "epoch": 0.443031854889251, "grad_norm": 1.7331477403640747, "learning_rate": 0.00012309774363464514, "loss": 1.6996, "step": 12371 }, { "epoch": 0.4430676670188193, "grad_norm": 1.5816339254379272, "learning_rate": 0.00012308645811402738, "loss": 1.469, "step": 12372 }, { "epoch": 0.44310347914838755, "grad_norm": 2.0275886058807373, "learning_rate": 0.00012307517228281117, "loss": 1.8279, "step": 12373 }, { "epoch": 0.44313929127795587, "grad_norm": 1.7863155603408813, "learning_rate": 0.00012306388614114822, "loss": 1.543, "step": 12374 }, { "epoch": 0.44317510340752414, "grad_norm": 1.4801067113876343, "learning_rate": 0.00012305259968919046, "loss": 1.5689, "step": 12375 }, { "epoch": 0.4432109155370924, "grad_norm": 1.4803080558776855, "learning_rate": 0.00012304131292708968, "loss": 1.3355, "step": 12376 }, { "epoch": 0.4432467276666607, "grad_norm": 1.7210575342178345, "learning_rate": 0.0001230300258549978, "loss": 1.602, "step": 12377 }, { "epoch": 0.443282539796229, "grad_norm": 1.4873367547988892, "learning_rate": 0.00012301873847306657, "loss": 1.3953, "step": 12378 }, { "epoch": 0.44331835192579727, "grad_norm": 1.6360687017440796, "learning_rate": 0.00012300745078144796, "loss": 1.4753, "step": 12379 }, { "epoch": 0.44335416405536554, "grad_norm": 1.5548124313354492, "learning_rate": 0.00012299616278029375, "loss": 1.3875, "step": 12380 }, { "epoch": 0.4433899761849338, "grad_norm": 1.4438000917434692, "learning_rate": 0.00012298487446975583, "loss": 1.5891, "step": 12381 }, { "epoch": 0.44342578831450213, "grad_norm": 2.041623830795288, "learning_rate": 0.0001229735858499861, "loss": 1.3645, "step": 12382 }, { "epoch": 0.4434616004440704, "grad_norm": 1.9327969551086426, "learning_rate": 0.0001229622969211364, "loss": 1.5932, "step": 12383 }, { "epoch": 0.44349741257363867, "grad_norm": 1.3270928859710693, "learning_rate": 0.00012295100768335858, "loss": 1.7006, "step": 12384 }, { "epoch": 0.443533224703207, "grad_norm": 1.8910813331604004, "learning_rate": 0.00012293971813680458, "loss": 1.6462, "step": 12385 }, { "epoch": 0.44356903683277527, "grad_norm": 2.8436388969421387, "learning_rate": 0.00012292842828162627, "loss": 1.5743, "step": 12386 }, { "epoch": 0.44360484896234353, "grad_norm": 1.7232835292816162, "learning_rate": 0.00012291713811797553, "loss": 1.4634, "step": 12387 }, { "epoch": 0.4436406610919118, "grad_norm": 1.5384464263916016, "learning_rate": 0.00012290584764600425, "loss": 1.4929, "step": 12388 }, { "epoch": 0.4436764732214801, "grad_norm": 1.418431043624878, "learning_rate": 0.00012289455686586434, "loss": 1.4444, "step": 12389 }, { "epoch": 0.4437122853510484, "grad_norm": 1.4715633392333984, "learning_rate": 0.0001228832657777077, "loss": 1.5561, "step": 12390 }, { "epoch": 0.44374809748061667, "grad_norm": 1.7735953330993652, "learning_rate": 0.00012287197438168624, "loss": 1.9554, "step": 12391 }, { "epoch": 0.443783909610185, "grad_norm": 1.6121642589569092, "learning_rate": 0.00012286068267795185, "loss": 1.3333, "step": 12392 }, { "epoch": 0.44381972173975326, "grad_norm": 1.414981722831726, "learning_rate": 0.00012284939066665648, "loss": 1.5743, "step": 12393 }, { "epoch": 0.44385553386932153, "grad_norm": 1.645015001296997, "learning_rate": 0.00012283809834795202, "loss": 1.6721, "step": 12394 }, { "epoch": 0.4438913459988898, "grad_norm": 1.310796856880188, "learning_rate": 0.00012282680572199043, "loss": 1.4809, "step": 12395 }, { "epoch": 0.4439271581284581, "grad_norm": 1.5144050121307373, "learning_rate": 0.00012281551278892357, "loss": 1.1173, "step": 12396 }, { "epoch": 0.4439629702580264, "grad_norm": 1.5878082513809204, "learning_rate": 0.00012280421954890346, "loss": 1.4621, "step": 12397 }, { "epoch": 0.44399878238759466, "grad_norm": 2.0923068523406982, "learning_rate": 0.000122792926002082, "loss": 1.4257, "step": 12398 }, { "epoch": 0.444034594517163, "grad_norm": 1.6810699701309204, "learning_rate": 0.00012278163214861107, "loss": 1.3412, "step": 12399 }, { "epoch": 0.44407040664673125, "grad_norm": 1.7527968883514404, "learning_rate": 0.00012277033798864268, "loss": 1.673, "step": 12400 }, { "epoch": 0.4441062187762995, "grad_norm": 2.4101831912994385, "learning_rate": 0.00012275904352232876, "loss": 1.4516, "step": 12401 }, { "epoch": 0.4441420309058678, "grad_norm": 1.7955946922302246, "learning_rate": 0.00012274774874982132, "loss": 1.8107, "step": 12402 }, { "epoch": 0.4441778430354361, "grad_norm": 1.6984844207763672, "learning_rate": 0.0001227364536712722, "loss": 1.5125, "step": 12403 }, { "epoch": 0.4442136551650044, "grad_norm": 1.418100357055664, "learning_rate": 0.00012272515828683344, "loss": 1.487, "step": 12404 }, { "epoch": 0.44424946729457265, "grad_norm": 1.629875898361206, "learning_rate": 0.00012271386259665701, "loss": 1.4305, "step": 12405 }, { "epoch": 0.444285279424141, "grad_norm": 2.2954277992248535, "learning_rate": 0.00012270256660089484, "loss": 1.7927, "step": 12406 }, { "epoch": 0.44432109155370925, "grad_norm": 1.8321059942245483, "learning_rate": 0.00012269127029969893, "loss": 1.5456, "step": 12407 }, { "epoch": 0.4443569036832775, "grad_norm": 1.7216614484786987, "learning_rate": 0.00012267997369322126, "loss": 1.5889, "step": 12408 }, { "epoch": 0.4443927158128458, "grad_norm": 1.7237794399261475, "learning_rate": 0.00012266867678161375, "loss": 1.3024, "step": 12409 }, { "epoch": 0.4444285279424141, "grad_norm": 1.995652437210083, "learning_rate": 0.00012265737956502847, "loss": 1.3176, "step": 12410 }, { "epoch": 0.4444643400719824, "grad_norm": 1.5316146612167358, "learning_rate": 0.0001226460820436174, "loss": 1.627, "step": 12411 }, { "epoch": 0.44450015220155065, "grad_norm": 1.9641482830047607, "learning_rate": 0.00012263478421753243, "loss": 1.3494, "step": 12412 }, { "epoch": 0.44453596433111897, "grad_norm": 1.9962435960769653, "learning_rate": 0.0001226234860869257, "loss": 1.3349, "step": 12413 }, { "epoch": 0.44457177646068724, "grad_norm": 1.4365565776824951, "learning_rate": 0.00012261218765194913, "loss": 1.4867, "step": 12414 }, { "epoch": 0.4446075885902555, "grad_norm": 1.7713817358016968, "learning_rate": 0.00012260088891275476, "loss": 1.5989, "step": 12415 }, { "epoch": 0.4446434007198238, "grad_norm": 1.790130376815796, "learning_rate": 0.00012258958986949455, "loss": 1.5266, "step": 12416 }, { "epoch": 0.4446792128493921, "grad_norm": 1.350547432899475, "learning_rate": 0.00012257829052232056, "loss": 1.8426, "step": 12417 }, { "epoch": 0.44471502497896037, "grad_norm": 1.452842116355896, "learning_rate": 0.00012256699087138479, "loss": 1.4189, "step": 12418 }, { "epoch": 0.44475083710852864, "grad_norm": 2.3444418907165527, "learning_rate": 0.0001225556909168393, "loss": 1.565, "step": 12419 }, { "epoch": 0.44478664923809696, "grad_norm": 2.152139902114868, "learning_rate": 0.00012254439065883602, "loss": 1.4686, "step": 12420 }, { "epoch": 0.44482246136766523, "grad_norm": 1.6714543104171753, "learning_rate": 0.0001225330900975271, "loss": 1.4201, "step": 12421 }, { "epoch": 0.4448582734972335, "grad_norm": 1.691991925239563, "learning_rate": 0.00012252178923306448, "loss": 1.2425, "step": 12422 }, { "epoch": 0.44489408562680177, "grad_norm": 1.3791453838348389, "learning_rate": 0.00012251048806560027, "loss": 1.2884, "step": 12423 }, { "epoch": 0.4449298977563701, "grad_norm": 1.6652225255966187, "learning_rate": 0.00012249918659528648, "loss": 1.4209, "step": 12424 }, { "epoch": 0.44496570988593837, "grad_norm": 2.57869291305542, "learning_rate": 0.0001224878848222751, "loss": 1.6552, "step": 12425 }, { "epoch": 0.44500152201550663, "grad_norm": 1.6043140888214111, "learning_rate": 0.0001224765827467183, "loss": 1.2117, "step": 12426 }, { "epoch": 0.44503733414507496, "grad_norm": 1.4723271131515503, "learning_rate": 0.00012246528036876807, "loss": 1.5539, "step": 12427 }, { "epoch": 0.4450731462746432, "grad_norm": 2.1970090866088867, "learning_rate": 0.00012245397768857646, "loss": 1.4757, "step": 12428 }, { "epoch": 0.4451089584042115, "grad_norm": 1.648956537246704, "learning_rate": 0.0001224426747062955, "loss": 1.4227, "step": 12429 }, { "epoch": 0.44514477053377977, "grad_norm": 1.9277909994125366, "learning_rate": 0.00012243137142207733, "loss": 1.6855, "step": 12430 }, { "epoch": 0.4451805826633481, "grad_norm": 1.7373301982879639, "learning_rate": 0.000122420067836074, "loss": 1.3754, "step": 12431 }, { "epoch": 0.44521639479291636, "grad_norm": 1.5616296529769897, "learning_rate": 0.0001224087639484376, "loss": 1.539, "step": 12432 }, { "epoch": 0.44525220692248463, "grad_norm": 2.611417055130005, "learning_rate": 0.00012239745975932016, "loss": 1.4478, "step": 12433 }, { "epoch": 0.44528801905205295, "grad_norm": 3.34085750579834, "learning_rate": 0.00012238615526887378, "loss": 2.1207, "step": 12434 }, { "epoch": 0.4453238311816212, "grad_norm": 1.9090518951416016, "learning_rate": 0.00012237485047725057, "loss": 1.6396, "step": 12435 }, { "epoch": 0.4453596433111895, "grad_norm": 1.521642804145813, "learning_rate": 0.00012236354538460259, "loss": 1.5772, "step": 12436 }, { "epoch": 0.44539545544075776, "grad_norm": 2.2825441360473633, "learning_rate": 0.000122352239991082, "loss": 1.5948, "step": 12437 }, { "epoch": 0.4454312675703261, "grad_norm": 1.408004641532898, "learning_rate": 0.0001223409342968408, "loss": 1.244, "step": 12438 }, { "epoch": 0.44546707969989435, "grad_norm": 1.4252254962921143, "learning_rate": 0.00012232962830203116, "loss": 1.5186, "step": 12439 }, { "epoch": 0.4455028918294626, "grad_norm": 2.0045323371887207, "learning_rate": 0.00012231832200680518, "loss": 1.4156, "step": 12440 }, { "epoch": 0.44553870395903095, "grad_norm": 1.9673701524734497, "learning_rate": 0.00012230701541131499, "loss": 1.2987, "step": 12441 }, { "epoch": 0.4455745160885992, "grad_norm": 1.8658106327056885, "learning_rate": 0.00012229570851571265, "loss": 1.5869, "step": 12442 }, { "epoch": 0.4456103282181675, "grad_norm": 2.748161554336548, "learning_rate": 0.00012228440132015033, "loss": 1.375, "step": 12443 }, { "epoch": 0.44564614034773575, "grad_norm": 2.1242969036102295, "learning_rate": 0.0001222730938247801, "loss": 1.8089, "step": 12444 }, { "epoch": 0.4456819524773041, "grad_norm": 1.5516866445541382, "learning_rate": 0.00012226178602975417, "loss": 1.4706, "step": 12445 }, { "epoch": 0.44571776460687235, "grad_norm": 2.02280855178833, "learning_rate": 0.00012225047793522462, "loss": 1.583, "step": 12446 }, { "epoch": 0.4457535767364406, "grad_norm": 1.7258055210113525, "learning_rate": 0.00012223916954134356, "loss": 1.6239, "step": 12447 }, { "epoch": 0.44578938886600894, "grad_norm": 1.9967955350875854, "learning_rate": 0.00012222786084826318, "loss": 1.5723, "step": 12448 }, { "epoch": 0.4458252009955772, "grad_norm": 1.7219719886779785, "learning_rate": 0.00012221655185613557, "loss": 1.376, "step": 12449 }, { "epoch": 0.4458610131251455, "grad_norm": 1.6591328382492065, "learning_rate": 0.00012220524256511297, "loss": 1.5033, "step": 12450 }, { "epoch": 0.44589682525471375, "grad_norm": 1.6007272005081177, "learning_rate": 0.00012219393297534744, "loss": 1.0919, "step": 12451 }, { "epoch": 0.44593263738428207, "grad_norm": 1.706794261932373, "learning_rate": 0.00012218262308699119, "loss": 1.319, "step": 12452 }, { "epoch": 0.44596844951385034, "grad_norm": 1.502685785293579, "learning_rate": 0.00012217131290019633, "loss": 1.5424, "step": 12453 }, { "epoch": 0.4460042616434186, "grad_norm": 1.5585651397705078, "learning_rate": 0.00012216000241511507, "loss": 1.5909, "step": 12454 }, { "epoch": 0.44604007377298693, "grad_norm": 3.2422244548797607, "learning_rate": 0.00012214869163189958, "loss": 1.6386, "step": 12455 }, { "epoch": 0.4460758859025552, "grad_norm": 1.7532734870910645, "learning_rate": 0.00012213738055070195, "loss": 1.59, "step": 12456 }, { "epoch": 0.44611169803212347, "grad_norm": 1.761208176612854, "learning_rate": 0.0001221260691716745, "loss": 1.3127, "step": 12457 }, { "epoch": 0.44614751016169174, "grad_norm": 1.8905987739562988, "learning_rate": 0.0001221147574949693, "loss": 1.4267, "step": 12458 }, { "epoch": 0.44618332229126006, "grad_norm": 1.5984662771224976, "learning_rate": 0.00012210344552073855, "loss": 1.8281, "step": 12459 }, { "epoch": 0.44621913442082833, "grad_norm": 1.3725858926773071, "learning_rate": 0.00012209213324913446, "loss": 1.4297, "step": 12460 }, { "epoch": 0.4462549465503966, "grad_norm": 2.176337242126465, "learning_rate": 0.00012208082068030924, "loss": 1.7024, "step": 12461 }, { "epoch": 0.4462907586799649, "grad_norm": 1.6479774713516235, "learning_rate": 0.00012206950781441502, "loss": 1.2077, "step": 12462 }, { "epoch": 0.4463265708095332, "grad_norm": 1.9320515394210815, "learning_rate": 0.00012205819465160407, "loss": 1.8261, "step": 12463 }, { "epoch": 0.44636238293910147, "grad_norm": 1.9457646608352661, "learning_rate": 0.00012204688119202852, "loss": 1.6547, "step": 12464 }, { "epoch": 0.44639819506866973, "grad_norm": 1.4203673601150513, "learning_rate": 0.00012203556743584063, "loss": 1.449, "step": 12465 }, { "epoch": 0.44643400719823806, "grad_norm": 1.9975597858428955, "learning_rate": 0.00012202425338319265, "loss": 1.7199, "step": 12466 }, { "epoch": 0.4464698193278063, "grad_norm": 1.3389594554901123, "learning_rate": 0.00012201293903423675, "loss": 1.3139, "step": 12467 }, { "epoch": 0.4465056314573746, "grad_norm": 1.865659475326538, "learning_rate": 0.00012200162438912512, "loss": 1.7206, "step": 12468 }, { "epoch": 0.4465414435869429, "grad_norm": 2.4966471195220947, "learning_rate": 0.00012199030944801, "loss": 1.8555, "step": 12469 }, { "epoch": 0.4465772557165112, "grad_norm": 1.775084137916565, "learning_rate": 0.00012197899421104367, "loss": 1.52, "step": 12470 }, { "epoch": 0.44661306784607946, "grad_norm": 1.4114207029342651, "learning_rate": 0.00012196767867837829, "loss": 1.531, "step": 12471 }, { "epoch": 0.44664887997564773, "grad_norm": 1.6351429224014282, "learning_rate": 0.00012195636285016614, "loss": 1.5006, "step": 12472 }, { "epoch": 0.44668469210521605, "grad_norm": 1.3841909170150757, "learning_rate": 0.00012194504672655944, "loss": 1.4175, "step": 12473 }, { "epoch": 0.4467205042347843, "grad_norm": 1.8590139150619507, "learning_rate": 0.00012193373030771046, "loss": 1.4761, "step": 12474 }, { "epoch": 0.4467563163643526, "grad_norm": 1.685275673866272, "learning_rate": 0.00012192241359377143, "loss": 1.5694, "step": 12475 }, { "epoch": 0.4467921284939209, "grad_norm": 1.7317240238189697, "learning_rate": 0.00012191109658489462, "loss": 1.3867, "step": 12476 }, { "epoch": 0.4468279406234892, "grad_norm": 2.1701266765594482, "learning_rate": 0.00012189977928123224, "loss": 1.6338, "step": 12477 }, { "epoch": 0.44686375275305745, "grad_norm": 1.709526538848877, "learning_rate": 0.0001218884616829366, "loss": 1.4318, "step": 12478 }, { "epoch": 0.4468995648826257, "grad_norm": 2.59258770942688, "learning_rate": 0.00012187714379015993, "loss": 1.8929, "step": 12479 }, { "epoch": 0.44693537701219405, "grad_norm": 1.6843186616897583, "learning_rate": 0.00012186582560305448, "loss": 1.4508, "step": 12480 }, { "epoch": 0.4469711891417623, "grad_norm": 1.3819479942321777, "learning_rate": 0.0001218545071217726, "loss": 1.5629, "step": 12481 }, { "epoch": 0.4470070012713306, "grad_norm": 2.069230079650879, "learning_rate": 0.00012184318834646648, "loss": 1.745, "step": 12482 }, { "epoch": 0.4470428134008989, "grad_norm": 1.6494131088256836, "learning_rate": 0.00012183186927728846, "loss": 1.4686, "step": 12483 }, { "epoch": 0.4470786255304672, "grad_norm": 1.2454302310943604, "learning_rate": 0.00012182054991439078, "loss": 1.2643, "step": 12484 }, { "epoch": 0.44711443766003545, "grad_norm": 1.628246784210205, "learning_rate": 0.00012180923025792579, "loss": 1.6646, "step": 12485 }, { "epoch": 0.4471502497896037, "grad_norm": 1.3497425317764282, "learning_rate": 0.00012179791030804573, "loss": 1.4701, "step": 12486 }, { "epoch": 0.44718606191917204, "grad_norm": 1.5856043100357056, "learning_rate": 0.00012178659006490285, "loss": 1.625, "step": 12487 }, { "epoch": 0.4472218740487403, "grad_norm": 2.0601372718811035, "learning_rate": 0.00012177526952864955, "loss": 1.6557, "step": 12488 }, { "epoch": 0.4472576861783086, "grad_norm": 1.5500800609588623, "learning_rate": 0.00012176394869943805, "loss": 1.7003, "step": 12489 }, { "epoch": 0.4472934983078769, "grad_norm": 1.6595675945281982, "learning_rate": 0.00012175262757742074, "loss": 1.5211, "step": 12490 }, { "epoch": 0.44732931043744517, "grad_norm": 1.8388330936431885, "learning_rate": 0.00012174130616274985, "loss": 1.5596, "step": 12491 }, { "epoch": 0.44736512256701344, "grad_norm": 1.7560348510742188, "learning_rate": 0.00012172998445557775, "loss": 1.4747, "step": 12492 }, { "epoch": 0.4474009346965817, "grad_norm": 2.180821180343628, "learning_rate": 0.00012171866245605671, "loss": 1.6482, "step": 12493 }, { "epoch": 0.44743674682615003, "grad_norm": 1.6616812944412231, "learning_rate": 0.00012170734016433914, "loss": 1.4381, "step": 12494 }, { "epoch": 0.4474725589557183, "grad_norm": 1.351104736328125, "learning_rate": 0.00012169601758057727, "loss": 1.6061, "step": 12495 }, { "epoch": 0.44750837108528657, "grad_norm": 1.8594951629638672, "learning_rate": 0.00012168469470492345, "loss": 1.5047, "step": 12496 }, { "epoch": 0.4475441832148549, "grad_norm": 1.516777515411377, "learning_rate": 0.00012167337153753007, "loss": 1.9187, "step": 12497 }, { "epoch": 0.44757999534442316, "grad_norm": 1.9803366661071777, "learning_rate": 0.00012166204807854942, "loss": 1.584, "step": 12498 }, { "epoch": 0.44761580747399143, "grad_norm": 1.7764346599578857, "learning_rate": 0.00012165072432813385, "loss": 1.388, "step": 12499 }, { "epoch": 0.4476516196035597, "grad_norm": 1.7678972482681274, "learning_rate": 0.0001216394002864357, "loss": 1.3775, "step": 12500 }, { "epoch": 0.447687431733128, "grad_norm": 1.4079352617263794, "learning_rate": 0.00012162807595360737, "loss": 1.5185, "step": 12501 }, { "epoch": 0.4477232438626963, "grad_norm": 1.933819055557251, "learning_rate": 0.00012161675132980114, "loss": 1.5765, "step": 12502 }, { "epoch": 0.44775905599226457, "grad_norm": 2.9456725120544434, "learning_rate": 0.00012160542641516945, "loss": 1.8331, "step": 12503 }, { "epoch": 0.4477948681218329, "grad_norm": 1.9133472442626953, "learning_rate": 0.00012159410120986456, "loss": 1.6421, "step": 12504 }, { "epoch": 0.44783068025140116, "grad_norm": 1.5416887998580933, "learning_rate": 0.00012158277571403893, "loss": 1.3045, "step": 12505 }, { "epoch": 0.4478664923809694, "grad_norm": 2.035646677017212, "learning_rate": 0.00012157144992784486, "loss": 1.4106, "step": 12506 }, { "epoch": 0.4479023045105377, "grad_norm": 1.2931156158447266, "learning_rate": 0.00012156012385143479, "loss": 1.7832, "step": 12507 }, { "epoch": 0.447938116640106, "grad_norm": 1.6115169525146484, "learning_rate": 0.00012154879748496104, "loss": 1.4598, "step": 12508 }, { "epoch": 0.4479739287696743, "grad_norm": 2.0151591300964355, "learning_rate": 0.00012153747082857601, "loss": 1.7758, "step": 12509 }, { "epoch": 0.44800974089924256, "grad_norm": 1.969767451286316, "learning_rate": 0.00012152614388243213, "loss": 1.692, "step": 12510 }, { "epoch": 0.4480455530288109, "grad_norm": 2.38447904586792, "learning_rate": 0.00012151481664668175, "loss": 1.5807, "step": 12511 }, { "epoch": 0.44808136515837915, "grad_norm": 1.623461127281189, "learning_rate": 0.00012150348912147723, "loss": 1.3877, "step": 12512 }, { "epoch": 0.4481171772879474, "grad_norm": 1.4375425577163696, "learning_rate": 0.000121492161306971, "loss": 1.5076, "step": 12513 }, { "epoch": 0.4481529894175157, "grad_norm": 1.580809473991394, "learning_rate": 0.00012148083320331549, "loss": 1.5652, "step": 12514 }, { "epoch": 0.448188801547084, "grad_norm": 2.1061019897460938, "learning_rate": 0.00012146950481066304, "loss": 1.6577, "step": 12515 }, { "epoch": 0.4482246136766523, "grad_norm": 1.5830954313278198, "learning_rate": 0.00012145817612916612, "loss": 1.5543, "step": 12516 }, { "epoch": 0.44826042580622055, "grad_norm": 1.8224624395370483, "learning_rate": 0.00012144684715897711, "loss": 1.2341, "step": 12517 }, { "epoch": 0.4482962379357889, "grad_norm": 1.5227636098861694, "learning_rate": 0.00012143551790024848, "loss": 1.641, "step": 12518 }, { "epoch": 0.44833205006535715, "grad_norm": 1.8081822395324707, "learning_rate": 0.00012142418835313254, "loss": 1.3633, "step": 12519 }, { "epoch": 0.4483678621949254, "grad_norm": 1.5999212265014648, "learning_rate": 0.00012141285851778183, "loss": 1.444, "step": 12520 }, { "epoch": 0.4484036743244937, "grad_norm": 2.473132371902466, "learning_rate": 0.00012140152839434873, "loss": 1.7155, "step": 12521 }, { "epoch": 0.448439486454062, "grad_norm": 2.2659380435943604, "learning_rate": 0.00012139019798298563, "loss": 1.7137, "step": 12522 }, { "epoch": 0.4484752985836303, "grad_norm": 2.1838572025299072, "learning_rate": 0.00012137886728384504, "loss": 1.5565, "step": 12523 }, { "epoch": 0.44851111071319855, "grad_norm": 1.3856747150421143, "learning_rate": 0.00012136753629707936, "loss": 1.5583, "step": 12524 }, { "epoch": 0.44854692284276687, "grad_norm": 1.9057285785675049, "learning_rate": 0.00012135620502284104, "loss": 1.5314, "step": 12525 }, { "epoch": 0.44858273497233514, "grad_norm": 1.7440130710601807, "learning_rate": 0.00012134487346128252, "loss": 1.6575, "step": 12526 }, { "epoch": 0.4486185471019034, "grad_norm": 2.055412769317627, "learning_rate": 0.00012133354161255628, "loss": 1.4412, "step": 12527 }, { "epoch": 0.4486543592314717, "grad_norm": 1.5365710258483887, "learning_rate": 0.00012132220947681472, "loss": 1.2439, "step": 12528 }, { "epoch": 0.44869017136104, "grad_norm": 1.7394073009490967, "learning_rate": 0.00012131087705421036, "loss": 1.5784, "step": 12529 }, { "epoch": 0.44872598349060827, "grad_norm": 1.6387853622436523, "learning_rate": 0.00012129954434489566, "loss": 1.4735, "step": 12530 }, { "epoch": 0.44876179562017654, "grad_norm": 1.3802781105041504, "learning_rate": 0.00012128821134902302, "loss": 1.4855, "step": 12531 }, { "epoch": 0.44879760774974486, "grad_norm": 2.449227809906006, "learning_rate": 0.00012127687806674499, "loss": 1.7276, "step": 12532 }, { "epoch": 0.44883341987931313, "grad_norm": 1.6007570028305054, "learning_rate": 0.00012126554449821399, "loss": 1.549, "step": 12533 }, { "epoch": 0.4488692320088814, "grad_norm": 1.3325066566467285, "learning_rate": 0.00012125421064358253, "loss": 1.3366, "step": 12534 }, { "epoch": 0.44890504413844967, "grad_norm": 1.4373480081558228, "learning_rate": 0.00012124287650300307, "loss": 1.6836, "step": 12535 }, { "epoch": 0.448940856268018, "grad_norm": 2.2037734985351562, "learning_rate": 0.00012123154207662815, "loss": 1.4235, "step": 12536 }, { "epoch": 0.44897666839758626, "grad_norm": 1.5827618837356567, "learning_rate": 0.00012122020736461018, "loss": 1.5252, "step": 12537 }, { "epoch": 0.44901248052715453, "grad_norm": 1.817306399345398, "learning_rate": 0.0001212088723671017, "loss": 1.6918, "step": 12538 }, { "epoch": 0.44904829265672286, "grad_norm": 1.913685917854309, "learning_rate": 0.0001211975370842552, "loss": 1.6136, "step": 12539 }, { "epoch": 0.4490841047862911, "grad_norm": 2.0694994926452637, "learning_rate": 0.00012118620151622317, "loss": 1.3688, "step": 12540 }, { "epoch": 0.4491199169158594, "grad_norm": 1.696552038192749, "learning_rate": 0.00012117486566315814, "loss": 1.4033, "step": 12541 }, { "epoch": 0.44915572904542767, "grad_norm": 1.6482453346252441, "learning_rate": 0.0001211635295252126, "loss": 1.4401, "step": 12542 }, { "epoch": 0.449191541174996, "grad_norm": 1.2823840379714966, "learning_rate": 0.0001211521931025391, "loss": 1.4209, "step": 12543 }, { "epoch": 0.44922735330456426, "grad_norm": 1.704340934753418, "learning_rate": 0.00012114085639529007, "loss": 1.5546, "step": 12544 }, { "epoch": 0.4492631654341325, "grad_norm": 1.6278151273727417, "learning_rate": 0.00012112951940361812, "loss": 1.6255, "step": 12545 }, { "epoch": 0.44929897756370085, "grad_norm": 2.9371583461761475, "learning_rate": 0.00012111818212767572, "loss": 1.8694, "step": 12546 }, { "epoch": 0.4493347896932691, "grad_norm": 1.6199440956115723, "learning_rate": 0.00012110684456761547, "loss": 1.1853, "step": 12547 }, { "epoch": 0.4493706018228374, "grad_norm": 1.508470892906189, "learning_rate": 0.0001210955067235898, "loss": 1.4939, "step": 12548 }, { "epoch": 0.44940641395240566, "grad_norm": 1.4398698806762695, "learning_rate": 0.00012108416859575131, "loss": 1.5191, "step": 12549 }, { "epoch": 0.449442226081974, "grad_norm": 1.856343150138855, "learning_rate": 0.00012107283018425256, "loss": 1.8702, "step": 12550 }, { "epoch": 0.44947803821154225, "grad_norm": 1.3848611116409302, "learning_rate": 0.00012106149148924602, "loss": 1.6158, "step": 12551 }, { "epoch": 0.4495138503411105, "grad_norm": 1.683535099029541, "learning_rate": 0.0001210501525108843, "loss": 1.8212, "step": 12552 }, { "epoch": 0.44954966247067885, "grad_norm": 1.8491222858428955, "learning_rate": 0.00012103881324931991, "loss": 1.3237, "step": 12553 }, { "epoch": 0.4495854746002471, "grad_norm": 1.5038716793060303, "learning_rate": 0.00012102747370470546, "loss": 1.2653, "step": 12554 }, { "epoch": 0.4496212867298154, "grad_norm": 1.5144926309585571, "learning_rate": 0.00012101613387719348, "loss": 1.6953, "step": 12555 }, { "epoch": 0.44965709885938365, "grad_norm": 2.2025644779205322, "learning_rate": 0.00012100479376693652, "loss": 1.5872, "step": 12556 }, { "epoch": 0.449692910988952, "grad_norm": 1.7867844104766846, "learning_rate": 0.00012099345337408712, "loss": 1.371, "step": 12557 }, { "epoch": 0.44972872311852025, "grad_norm": 1.6688547134399414, "learning_rate": 0.00012098211269879791, "loss": 1.3092, "step": 12558 }, { "epoch": 0.4497645352480885, "grad_norm": 1.5913275480270386, "learning_rate": 0.00012097077174122143, "loss": 1.6201, "step": 12559 }, { "epoch": 0.44980034737765684, "grad_norm": 1.4191871881484985, "learning_rate": 0.00012095943050151026, "loss": 1.5449, "step": 12560 }, { "epoch": 0.4498361595072251, "grad_norm": 1.5668357610702515, "learning_rate": 0.000120948088979817, "loss": 1.2965, "step": 12561 }, { "epoch": 0.4498719716367934, "grad_norm": 1.4503635168075562, "learning_rate": 0.00012093674717629419, "loss": 1.5289, "step": 12562 }, { "epoch": 0.44990778376636165, "grad_norm": 1.6835050582885742, "learning_rate": 0.00012092540509109451, "loss": 1.5644, "step": 12563 }, { "epoch": 0.44994359589592997, "grad_norm": 1.895340919494629, "learning_rate": 0.00012091406272437049, "loss": 1.3566, "step": 12564 }, { "epoch": 0.44997940802549824, "grad_norm": 1.6163996458053589, "learning_rate": 0.00012090272007627472, "loss": 1.5901, "step": 12565 }, { "epoch": 0.4500152201550665, "grad_norm": 1.5837438106536865, "learning_rate": 0.0001208913771469598, "loss": 1.3175, "step": 12566 }, { "epoch": 0.45005103228463483, "grad_norm": 1.9061717987060547, "learning_rate": 0.00012088003393657837, "loss": 1.4182, "step": 12567 }, { "epoch": 0.4500868444142031, "grad_norm": 1.5280214548110962, "learning_rate": 0.00012086869044528297, "loss": 1.5659, "step": 12568 }, { "epoch": 0.45012265654377137, "grad_norm": 2.357043743133545, "learning_rate": 0.0001208573466732263, "loss": 1.5281, "step": 12569 }, { "epoch": 0.45015846867333964, "grad_norm": 1.4980483055114746, "learning_rate": 0.00012084600262056094, "loss": 1.6226, "step": 12570 }, { "epoch": 0.45019428080290796, "grad_norm": 1.7290112972259521, "learning_rate": 0.0001208346582874395, "loss": 1.5311, "step": 12571 }, { "epoch": 0.45023009293247623, "grad_norm": 1.7056915760040283, "learning_rate": 0.00012082331367401458, "loss": 1.532, "step": 12572 }, { "epoch": 0.4502659050620445, "grad_norm": 2.0714375972747803, "learning_rate": 0.00012081196878043885, "loss": 1.2363, "step": 12573 }, { "epoch": 0.4503017171916128, "grad_norm": 1.90829598903656, "learning_rate": 0.00012080062360686495, "loss": 1.5991, "step": 12574 }, { "epoch": 0.4503375293211811, "grad_norm": 1.3349385261535645, "learning_rate": 0.00012078927815344545, "loss": 1.2521, "step": 12575 }, { "epoch": 0.45037334145074936, "grad_norm": 1.827406883239746, "learning_rate": 0.00012077793242033307, "loss": 1.6133, "step": 12576 }, { "epoch": 0.45040915358031763, "grad_norm": 1.918264627456665, "learning_rate": 0.00012076658640768036, "loss": 1.5396, "step": 12577 }, { "epoch": 0.45044496570988596, "grad_norm": 1.7360564470291138, "learning_rate": 0.00012075524011564005, "loss": 1.3884, "step": 12578 }, { "epoch": 0.4504807778394542, "grad_norm": 1.4378196001052856, "learning_rate": 0.00012074389354436475, "loss": 1.5432, "step": 12579 }, { "epoch": 0.4505165899690225, "grad_norm": 1.7006964683532715, "learning_rate": 0.00012073254669400713, "loss": 1.5753, "step": 12580 }, { "epoch": 0.45055240209859077, "grad_norm": 1.619866132736206, "learning_rate": 0.00012072119956471981, "loss": 1.5881, "step": 12581 }, { "epoch": 0.4505882142281591, "grad_norm": 1.2958283424377441, "learning_rate": 0.00012070985215665551, "loss": 1.6152, "step": 12582 }, { "epoch": 0.45062402635772736, "grad_norm": 2.0520436763763428, "learning_rate": 0.00012069850446996686, "loss": 1.7036, "step": 12583 }, { "epoch": 0.4506598384872956, "grad_norm": 1.5955454111099243, "learning_rate": 0.00012068715650480653, "loss": 1.4784, "step": 12584 }, { "epoch": 0.45069565061686395, "grad_norm": 2.244309425354004, "learning_rate": 0.00012067580826132718, "loss": 2.0482, "step": 12585 }, { "epoch": 0.4507314627464322, "grad_norm": 1.449869155883789, "learning_rate": 0.0001206644597396815, "loss": 1.4782, "step": 12586 }, { "epoch": 0.4507672748760005, "grad_norm": 1.647153615951538, "learning_rate": 0.00012065311094002218, "loss": 1.2063, "step": 12587 }, { "epoch": 0.45080308700556876, "grad_norm": 1.6689027547836304, "learning_rate": 0.00012064176186250189, "loss": 1.7451, "step": 12588 }, { "epoch": 0.4508388991351371, "grad_norm": 1.4815447330474854, "learning_rate": 0.00012063041250727331, "loss": 1.3996, "step": 12589 }, { "epoch": 0.45087471126470535, "grad_norm": 1.4417043924331665, "learning_rate": 0.00012061906287448914, "loss": 1.5646, "step": 12590 }, { "epoch": 0.4509105233942736, "grad_norm": 2.2002885341644287, "learning_rate": 0.00012060771296430209, "loss": 1.3061, "step": 12591 }, { "epoch": 0.45094633552384195, "grad_norm": 1.4272916316986084, "learning_rate": 0.00012059636277686486, "loss": 1.0158, "step": 12592 }, { "epoch": 0.4509821476534102, "grad_norm": 1.6139081716537476, "learning_rate": 0.00012058501231233011, "loss": 1.4949, "step": 12593 }, { "epoch": 0.4510179597829785, "grad_norm": 1.6155054569244385, "learning_rate": 0.00012057366157085058, "loss": 1.3464, "step": 12594 }, { "epoch": 0.45105377191254675, "grad_norm": 1.561390995979309, "learning_rate": 0.00012056231055257896, "loss": 1.3473, "step": 12595 }, { "epoch": 0.4510895840421151, "grad_norm": 1.356062650680542, "learning_rate": 0.000120550959257668, "loss": 1.3973, "step": 12596 }, { "epoch": 0.45112539617168335, "grad_norm": 1.578811526298523, "learning_rate": 0.00012053960768627036, "loss": 1.5595, "step": 12597 }, { "epoch": 0.4511612083012516, "grad_norm": 2.086986780166626, "learning_rate": 0.00012052825583853881, "loss": 1.3671, "step": 12598 }, { "epoch": 0.45119702043081994, "grad_norm": 2.0918164253234863, "learning_rate": 0.00012051690371462608, "loss": 1.1172, "step": 12599 }, { "epoch": 0.4512328325603882, "grad_norm": 1.4624220132827759, "learning_rate": 0.00012050555131468484, "loss": 1.4732, "step": 12600 }, { "epoch": 0.4512686446899565, "grad_norm": 1.6903061866760254, "learning_rate": 0.00012049419863886786, "loss": 1.6673, "step": 12601 }, { "epoch": 0.45130445681952475, "grad_norm": 1.5074726343154907, "learning_rate": 0.00012048284568732791, "loss": 2.0319, "step": 12602 }, { "epoch": 0.45134026894909307, "grad_norm": 2.3268983364105225, "learning_rate": 0.00012047149246021763, "loss": 1.5876, "step": 12603 }, { "epoch": 0.45137608107866134, "grad_norm": 1.8839945793151855, "learning_rate": 0.00012046013895768986, "loss": 1.5132, "step": 12604 }, { "epoch": 0.4514118932082296, "grad_norm": 1.4591853618621826, "learning_rate": 0.0001204487851798973, "loss": 1.6994, "step": 12605 }, { "epoch": 0.45144770533779793, "grad_norm": 1.9590816497802734, "learning_rate": 0.00012043743112699273, "loss": 1.2946, "step": 12606 }, { "epoch": 0.4514835174673662, "grad_norm": 1.7824480533599854, "learning_rate": 0.0001204260767991289, "loss": 1.4297, "step": 12607 }, { "epoch": 0.45151932959693447, "grad_norm": 2.672687292098999, "learning_rate": 0.00012041472219645854, "loss": 1.4044, "step": 12608 }, { "epoch": 0.45155514172650274, "grad_norm": 2.600595474243164, "learning_rate": 0.00012040336731913442, "loss": 1.8836, "step": 12609 }, { "epoch": 0.45159095385607106, "grad_norm": 1.701601266860962, "learning_rate": 0.00012039201216730931, "loss": 1.4913, "step": 12610 }, { "epoch": 0.45162676598563933, "grad_norm": 2.1332130432128906, "learning_rate": 0.00012038065674113598, "loss": 1.8227, "step": 12611 }, { "epoch": 0.4516625781152076, "grad_norm": 1.9110596179962158, "learning_rate": 0.0001203693010407672, "loss": 1.6782, "step": 12612 }, { "epoch": 0.4516983902447759, "grad_norm": 1.5340908765792847, "learning_rate": 0.00012035794506635575, "loss": 1.5475, "step": 12613 }, { "epoch": 0.4517342023743442, "grad_norm": 1.7282644510269165, "learning_rate": 0.0001203465888180544, "loss": 1.8253, "step": 12614 }, { "epoch": 0.45177001450391246, "grad_norm": 2.7363064289093018, "learning_rate": 0.00012033523229601598, "loss": 1.4577, "step": 12615 }, { "epoch": 0.45180582663348073, "grad_norm": 1.7300324440002441, "learning_rate": 0.00012032387550039319, "loss": 1.6123, "step": 12616 }, { "epoch": 0.45184163876304906, "grad_norm": 1.5794569253921509, "learning_rate": 0.00012031251843133891, "loss": 1.5446, "step": 12617 }, { "epoch": 0.4518774508926173, "grad_norm": 2.11045241355896, "learning_rate": 0.00012030116108900589, "loss": 1.5784, "step": 12618 }, { "epoch": 0.4519132630221856, "grad_norm": 1.8082817792892456, "learning_rate": 0.00012028980347354692, "loss": 1.3738, "step": 12619 }, { "epoch": 0.4519490751517539, "grad_norm": 1.3321770429611206, "learning_rate": 0.00012027844558511483, "loss": 1.6105, "step": 12620 }, { "epoch": 0.4519848872813222, "grad_norm": 1.5995198488235474, "learning_rate": 0.00012026708742386239, "loss": 1.3222, "step": 12621 }, { "epoch": 0.45202069941089046, "grad_norm": 2.029466390609741, "learning_rate": 0.00012025572898994246, "loss": 1.4356, "step": 12622 }, { "epoch": 0.4520565115404587, "grad_norm": 1.4476046562194824, "learning_rate": 0.00012024437028350779, "loss": 1.5459, "step": 12623 }, { "epoch": 0.45209232367002705, "grad_norm": 1.6885541677474976, "learning_rate": 0.00012023301130471128, "loss": 1.4401, "step": 12624 }, { "epoch": 0.4521281357995953, "grad_norm": 1.6791876554489136, "learning_rate": 0.00012022165205370565, "loss": 1.6886, "step": 12625 }, { "epoch": 0.4521639479291636, "grad_norm": 1.5566707849502563, "learning_rate": 0.00012021029253064382, "loss": 1.6669, "step": 12626 }, { "epoch": 0.4521997600587319, "grad_norm": 1.7910056114196777, "learning_rate": 0.00012019893273567855, "loss": 1.3655, "step": 12627 }, { "epoch": 0.4522355721883002, "grad_norm": 1.7929489612579346, "learning_rate": 0.00012018757266896267, "loss": 1.563, "step": 12628 }, { "epoch": 0.45227138431786845, "grad_norm": 2.385453701019287, "learning_rate": 0.00012017621233064908, "loss": 2.0782, "step": 12629 }, { "epoch": 0.4523071964474367, "grad_norm": 1.5448579788208008, "learning_rate": 0.00012016485172089056, "loss": 1.3892, "step": 12630 }, { "epoch": 0.45234300857700505, "grad_norm": 2.077671527862549, "learning_rate": 0.00012015349083983998, "loss": 1.3285, "step": 12631 }, { "epoch": 0.4523788207065733, "grad_norm": 1.3240525722503662, "learning_rate": 0.00012014212968765018, "loss": 1.5076, "step": 12632 }, { "epoch": 0.4524146328361416, "grad_norm": 2.3672759532928467, "learning_rate": 0.000120130768264474, "loss": 1.4313, "step": 12633 }, { "epoch": 0.4524504449657099, "grad_norm": 1.4926321506500244, "learning_rate": 0.00012011940657046427, "loss": 1.6316, "step": 12634 }, { "epoch": 0.4524862570952782, "grad_norm": 1.6162370443344116, "learning_rate": 0.00012010804460577395, "loss": 1.5586, "step": 12635 }, { "epoch": 0.45252206922484645, "grad_norm": 1.8206548690795898, "learning_rate": 0.00012009668237055578, "loss": 1.8584, "step": 12636 }, { "epoch": 0.4525578813544147, "grad_norm": 1.6547601222991943, "learning_rate": 0.00012008531986496266, "loss": 1.4219, "step": 12637 }, { "epoch": 0.45259369348398304, "grad_norm": 1.5482653379440308, "learning_rate": 0.0001200739570891475, "loss": 1.4394, "step": 12638 }, { "epoch": 0.4526295056135513, "grad_norm": 1.3440611362457275, "learning_rate": 0.0001200625940432631, "loss": 1.211, "step": 12639 }, { "epoch": 0.4526653177431196, "grad_norm": 1.2386219501495361, "learning_rate": 0.00012005123072746242, "loss": 1.4767, "step": 12640 }, { "epoch": 0.4527011298726879, "grad_norm": 2.0508835315704346, "learning_rate": 0.00012003986714189825, "loss": 1.305, "step": 12641 }, { "epoch": 0.45273694200225617, "grad_norm": 1.6780390739440918, "learning_rate": 0.00012002850328672357, "loss": 1.7344, "step": 12642 }, { "epoch": 0.45277275413182444, "grad_norm": 1.7541413307189941, "learning_rate": 0.00012001713916209117, "loss": 1.2988, "step": 12643 }, { "epoch": 0.4528085662613927, "grad_norm": 4.736975193023682, "learning_rate": 0.00012000577476815402, "loss": 1.3954, "step": 12644 }, { "epoch": 0.45284437839096103, "grad_norm": 1.9038360118865967, "learning_rate": 0.00011999441010506496, "loss": 1.5603, "step": 12645 }, { "epoch": 0.4528801905205293, "grad_norm": 1.3158032894134521, "learning_rate": 0.00011998304517297687, "loss": 1.3335, "step": 12646 }, { "epoch": 0.45291600265009757, "grad_norm": 1.6054844856262207, "learning_rate": 0.00011997167997204272, "loss": 1.5157, "step": 12647 }, { "epoch": 0.4529518147796659, "grad_norm": 2.2230138778686523, "learning_rate": 0.00011996031450241536, "loss": 1.4866, "step": 12648 }, { "epoch": 0.45298762690923416, "grad_norm": 2.4321999549865723, "learning_rate": 0.00011994894876424773, "loss": 1.7621, "step": 12649 }, { "epoch": 0.45302343903880243, "grad_norm": 1.6048284769058228, "learning_rate": 0.00011993758275769273, "loss": 1.4613, "step": 12650 }, { "epoch": 0.4530592511683707, "grad_norm": 1.7765322923660278, "learning_rate": 0.00011992621648290328, "loss": 1.4474, "step": 12651 }, { "epoch": 0.453095063297939, "grad_norm": 1.6691166162490845, "learning_rate": 0.00011991484994003226, "loss": 1.5177, "step": 12652 }, { "epoch": 0.4531308754275073, "grad_norm": 2.022249698638916, "learning_rate": 0.00011990348312923266, "loss": 1.697, "step": 12653 }, { "epoch": 0.45316668755707556, "grad_norm": 2.6226868629455566, "learning_rate": 0.00011989211605065733, "loss": 1.6507, "step": 12654 }, { "epoch": 0.4532024996866439, "grad_norm": 2.3299243450164795, "learning_rate": 0.00011988074870445927, "loss": 1.8215, "step": 12655 }, { "epoch": 0.45323831181621216, "grad_norm": 1.7379266023635864, "learning_rate": 0.00011986938109079133, "loss": 1.4798, "step": 12656 }, { "epoch": 0.4532741239457804, "grad_norm": 1.6245919466018677, "learning_rate": 0.00011985801320980654, "loss": 1.6791, "step": 12657 }, { "epoch": 0.4533099360753487, "grad_norm": 1.2778525352478027, "learning_rate": 0.00011984664506165777, "loss": 1.4197, "step": 12658 }, { "epoch": 0.453345748204917, "grad_norm": 1.747637152671814, "learning_rate": 0.00011983527664649801, "loss": 1.6668, "step": 12659 }, { "epoch": 0.4533815603344853, "grad_norm": 2.747385025024414, "learning_rate": 0.0001198239079644802, "loss": 1.4045, "step": 12660 }, { "epoch": 0.45341737246405356, "grad_norm": 2.079819917678833, "learning_rate": 0.00011981253901575726, "loss": 1.73, "step": 12661 }, { "epoch": 0.4534531845936219, "grad_norm": 1.518641471862793, "learning_rate": 0.00011980116980048217, "loss": 1.6875, "step": 12662 }, { "epoch": 0.45348899672319015, "grad_norm": 1.9608591794967651, "learning_rate": 0.00011978980031880789, "loss": 1.85, "step": 12663 }, { "epoch": 0.4535248088527584, "grad_norm": 1.6720517873764038, "learning_rate": 0.00011977843057088735, "loss": 1.7237, "step": 12664 }, { "epoch": 0.4535606209823267, "grad_norm": 2.0230205059051514, "learning_rate": 0.00011976706055687357, "loss": 1.5775, "step": 12665 }, { "epoch": 0.453596433111895, "grad_norm": 1.5287965536117554, "learning_rate": 0.00011975569027691947, "loss": 1.6407, "step": 12666 }, { "epoch": 0.4536322452414633, "grad_norm": 1.822375774383545, "learning_rate": 0.00011974431973117804, "loss": 1.761, "step": 12667 }, { "epoch": 0.45366805737103155, "grad_norm": 1.4832029342651367, "learning_rate": 0.00011973294891980224, "loss": 1.5349, "step": 12668 }, { "epoch": 0.4537038695005999, "grad_norm": 1.5550483465194702, "learning_rate": 0.00011972157784294508, "loss": 1.4935, "step": 12669 }, { "epoch": 0.45373968163016815, "grad_norm": 1.5525519847869873, "learning_rate": 0.00011971020650075954, "loss": 1.2612, "step": 12670 }, { "epoch": 0.4537754937597364, "grad_norm": 1.824698805809021, "learning_rate": 0.00011969883489339862, "loss": 1.3504, "step": 12671 }, { "epoch": 0.4538113058893047, "grad_norm": 1.8953741788864136, "learning_rate": 0.00011968746302101523, "loss": 1.4381, "step": 12672 }, { "epoch": 0.453847118018873, "grad_norm": 1.4722163677215576, "learning_rate": 0.00011967609088376245, "loss": 1.3717, "step": 12673 }, { "epoch": 0.4538829301484413, "grad_norm": 1.3879319429397583, "learning_rate": 0.00011966471848179324, "loss": 1.4582, "step": 12674 }, { "epoch": 0.45391874227800955, "grad_norm": 2.1239492893218994, "learning_rate": 0.00011965334581526062, "loss": 1.3412, "step": 12675 }, { "epoch": 0.45395455440757787, "grad_norm": 1.5013140439987183, "learning_rate": 0.00011964197288431756, "loss": 1.3925, "step": 12676 }, { "epoch": 0.45399036653714614, "grad_norm": 1.629763126373291, "learning_rate": 0.00011963059968911712, "loss": 1.7531, "step": 12677 }, { "epoch": 0.4540261786667144, "grad_norm": 2.189053773880005, "learning_rate": 0.00011961922622981225, "loss": 1.5956, "step": 12678 }, { "epoch": 0.4540619907962827, "grad_norm": 1.6427063941955566, "learning_rate": 0.00011960785250655604, "loss": 1.6511, "step": 12679 }, { "epoch": 0.454097802925851, "grad_norm": 1.7626343965530396, "learning_rate": 0.00011959647851950145, "loss": 1.4323, "step": 12680 }, { "epoch": 0.45413361505541927, "grad_norm": 1.5623730421066284, "learning_rate": 0.0001195851042688015, "loss": 1.7968, "step": 12681 }, { "epoch": 0.45416942718498754, "grad_norm": 1.481370210647583, "learning_rate": 0.00011957372975460925, "loss": 1.5173, "step": 12682 }, { "epoch": 0.45420523931455586, "grad_norm": 1.105037808418274, "learning_rate": 0.00011956235497707771, "loss": 1.403, "step": 12683 }, { "epoch": 0.45424105144412413, "grad_norm": 1.2726575136184692, "learning_rate": 0.00011955097993635991, "loss": 1.0945, "step": 12684 }, { "epoch": 0.4542768635736924, "grad_norm": 1.2600315809249878, "learning_rate": 0.0001195396046326089, "loss": 1.557, "step": 12685 }, { "epoch": 0.45431267570326067, "grad_norm": 1.5838819742202759, "learning_rate": 0.00011952822906597773, "loss": 1.4059, "step": 12686 }, { "epoch": 0.454348487832829, "grad_norm": 1.760190725326538, "learning_rate": 0.0001195168532366194, "loss": 1.3405, "step": 12687 }, { "epoch": 0.45438429996239726, "grad_norm": 2.3661205768585205, "learning_rate": 0.000119505477144687, "loss": 1.6014, "step": 12688 }, { "epoch": 0.45442011209196553, "grad_norm": 1.6117066144943237, "learning_rate": 0.00011949410079033359, "loss": 1.4842, "step": 12689 }, { "epoch": 0.45445592422153386, "grad_norm": 1.467039942741394, "learning_rate": 0.00011948272417371216, "loss": 1.5621, "step": 12690 }, { "epoch": 0.4544917363511021, "grad_norm": 1.4559403657913208, "learning_rate": 0.00011947134729497583, "loss": 1.6804, "step": 12691 }, { "epoch": 0.4545275484806704, "grad_norm": 1.5294259786605835, "learning_rate": 0.00011945997015427761, "loss": 1.5, "step": 12692 }, { "epoch": 0.45456336061023866, "grad_norm": 1.8137285709381104, "learning_rate": 0.00011944859275177063, "loss": 1.5028, "step": 12693 }, { "epoch": 0.454599172739807, "grad_norm": 1.7047687768936157, "learning_rate": 0.00011943721508760788, "loss": 1.5601, "step": 12694 }, { "epoch": 0.45463498486937526, "grad_norm": 1.5781142711639404, "learning_rate": 0.00011942583716194251, "loss": 1.5172, "step": 12695 }, { "epoch": 0.4546707969989435, "grad_norm": 1.6754029989242554, "learning_rate": 0.00011941445897492755, "loss": 1.654, "step": 12696 }, { "epoch": 0.45470660912851185, "grad_norm": 1.5918335914611816, "learning_rate": 0.0001194030805267161, "loss": 1.4808, "step": 12697 }, { "epoch": 0.4547424212580801, "grad_norm": 1.8302372694015503, "learning_rate": 0.0001193917018174612, "loss": 1.2755, "step": 12698 }, { "epoch": 0.4547782333876484, "grad_norm": 2.5428664684295654, "learning_rate": 0.00011938032284731599, "loss": 1.5935, "step": 12699 }, { "epoch": 0.45481404551721666, "grad_norm": 1.475474238395691, "learning_rate": 0.00011936894361643351, "loss": 1.2114, "step": 12700 }, { "epoch": 0.454849857646785, "grad_norm": 1.447190284729004, "learning_rate": 0.00011935756412496688, "loss": 1.4698, "step": 12701 }, { "epoch": 0.45488566977635325, "grad_norm": 1.8733444213867188, "learning_rate": 0.00011934618437306921, "loss": 1.5448, "step": 12702 }, { "epoch": 0.4549214819059215, "grad_norm": 1.7487825155258179, "learning_rate": 0.00011933480436089357, "loss": 1.3573, "step": 12703 }, { "epoch": 0.45495729403548985, "grad_norm": 1.2866499423980713, "learning_rate": 0.0001193234240885931, "loss": 1.2512, "step": 12704 }, { "epoch": 0.4549931061650581, "grad_norm": 2.3530335426330566, "learning_rate": 0.00011931204355632089, "loss": 1.5614, "step": 12705 }, { "epoch": 0.4550289182946264, "grad_norm": 2.5091681480407715, "learning_rate": 0.00011930066276423003, "loss": 1.6465, "step": 12706 }, { "epoch": 0.45506473042419465, "grad_norm": 1.8246722221374512, "learning_rate": 0.00011928928171247362, "loss": 1.2457, "step": 12707 }, { "epoch": 0.455100542553763, "grad_norm": 1.3693405389785767, "learning_rate": 0.00011927790040120484, "loss": 1.4335, "step": 12708 }, { "epoch": 0.45513635468333125, "grad_norm": 1.6477317810058594, "learning_rate": 0.00011926651883057676, "loss": 1.3098, "step": 12709 }, { "epoch": 0.4551721668128995, "grad_norm": 1.7943549156188965, "learning_rate": 0.00011925513700074253, "loss": 1.4057, "step": 12710 }, { "epoch": 0.45520797894246784, "grad_norm": 1.7328225374221802, "learning_rate": 0.00011924375491185526, "loss": 1.3484, "step": 12711 }, { "epoch": 0.4552437910720361, "grad_norm": 1.3542251586914062, "learning_rate": 0.00011923237256406812, "loss": 1.549, "step": 12712 }, { "epoch": 0.4552796032016044, "grad_norm": 1.7922602891921997, "learning_rate": 0.00011922098995753417, "loss": 1.9181, "step": 12713 }, { "epoch": 0.45531541533117265, "grad_norm": 1.549342155456543, "learning_rate": 0.00011920960709240662, "loss": 1.543, "step": 12714 }, { "epoch": 0.45535122746074097, "grad_norm": 1.754526972770691, "learning_rate": 0.0001191982239688386, "loss": 1.2378, "step": 12715 }, { "epoch": 0.45538703959030924, "grad_norm": 1.7519073486328125, "learning_rate": 0.00011918684058698319, "loss": 1.4704, "step": 12716 }, { "epoch": 0.4554228517198775, "grad_norm": 1.3553789854049683, "learning_rate": 0.00011917545694699365, "loss": 1.3812, "step": 12717 }, { "epoch": 0.45545866384944583, "grad_norm": 1.7538580894470215, "learning_rate": 0.00011916407304902302, "loss": 1.7192, "step": 12718 }, { "epoch": 0.4554944759790141, "grad_norm": 1.9250893592834473, "learning_rate": 0.00011915268889322456, "loss": 1.4597, "step": 12719 }, { "epoch": 0.45553028810858237, "grad_norm": 1.8414658308029175, "learning_rate": 0.00011914130447975131, "loss": 1.8114, "step": 12720 }, { "epoch": 0.45556610023815064, "grad_norm": 1.3821085691452026, "learning_rate": 0.00011912991980875654, "loss": 1.4411, "step": 12721 }, { "epoch": 0.45560191236771896, "grad_norm": 1.5049561262130737, "learning_rate": 0.00011911853488039337, "loss": 1.4983, "step": 12722 }, { "epoch": 0.45563772449728723, "grad_norm": 1.8283472061157227, "learning_rate": 0.00011910714969481498, "loss": 1.5525, "step": 12723 }, { "epoch": 0.4556735366268555, "grad_norm": 1.6375936269760132, "learning_rate": 0.00011909576425217455, "loss": 1.8264, "step": 12724 }, { "epoch": 0.4557093487564238, "grad_norm": 1.530905842781067, "learning_rate": 0.0001190843785526252, "loss": 1.5206, "step": 12725 }, { "epoch": 0.4557451608859921, "grad_norm": 1.5139286518096924, "learning_rate": 0.00011907299259632019, "loss": 1.4437, "step": 12726 }, { "epoch": 0.45578097301556036, "grad_norm": 1.5392321348190308, "learning_rate": 0.00011906160638341264, "loss": 1.43, "step": 12727 }, { "epoch": 0.45581678514512863, "grad_norm": 2.803849458694458, "learning_rate": 0.00011905021991405578, "loss": 1.8596, "step": 12728 }, { "epoch": 0.45585259727469696, "grad_norm": 1.545616865158081, "learning_rate": 0.00011903883318840279, "loss": 1.6796, "step": 12729 }, { "epoch": 0.4558884094042652, "grad_norm": 1.9524319171905518, "learning_rate": 0.00011902744620660686, "loss": 1.559, "step": 12730 }, { "epoch": 0.4559242215338335, "grad_norm": 2.1568334102630615, "learning_rate": 0.00011901605896882116, "loss": 1.5606, "step": 12731 }, { "epoch": 0.4559600336634018, "grad_norm": 2.381507635116577, "learning_rate": 0.00011900467147519893, "loss": 1.2422, "step": 12732 }, { "epoch": 0.4559958457929701, "grad_norm": 1.6769733428955078, "learning_rate": 0.00011899328372589338, "loss": 1.4349, "step": 12733 }, { "epoch": 0.45603165792253836, "grad_norm": 1.7291309833526611, "learning_rate": 0.00011898189572105767, "loss": 1.5192, "step": 12734 }, { "epoch": 0.4560674700521066, "grad_norm": 2.131802797317505, "learning_rate": 0.00011897050746084504, "loss": 1.3411, "step": 12735 }, { "epoch": 0.45610328218167495, "grad_norm": 1.5254218578338623, "learning_rate": 0.0001189591189454087, "loss": 1.3057, "step": 12736 }, { "epoch": 0.4561390943112432, "grad_norm": 1.46796715259552, "learning_rate": 0.00011894773017490189, "loss": 1.2683, "step": 12737 }, { "epoch": 0.4561749064408115, "grad_norm": 1.5670194625854492, "learning_rate": 0.00011893634114947778, "loss": 1.3185, "step": 12738 }, { "epoch": 0.4562107185703798, "grad_norm": 1.6794108152389526, "learning_rate": 0.00011892495186928966, "loss": 1.5493, "step": 12739 }, { "epoch": 0.4562465306999481, "grad_norm": 1.5749014616012573, "learning_rate": 0.00011891356233449069, "loss": 1.4679, "step": 12740 }, { "epoch": 0.45628234282951635, "grad_norm": 1.948190450668335, "learning_rate": 0.0001189021725452342, "loss": 1.5716, "step": 12741 }, { "epoch": 0.4563181549590846, "grad_norm": 1.7379204034805298, "learning_rate": 0.00011889078250167329, "loss": 1.4168, "step": 12742 }, { "epoch": 0.45635396708865295, "grad_norm": 1.6801133155822754, "learning_rate": 0.0001188793922039613, "loss": 1.1718, "step": 12743 }, { "epoch": 0.4563897792182212, "grad_norm": 2.2052829265594482, "learning_rate": 0.00011886800165225143, "loss": 1.5625, "step": 12744 }, { "epoch": 0.4564255913477895, "grad_norm": 1.8127167224884033, "learning_rate": 0.00011885661084669693, "loss": 1.4002, "step": 12745 }, { "epoch": 0.4564614034773578, "grad_norm": 1.4773932695388794, "learning_rate": 0.00011884521978745106, "loss": 1.6552, "step": 12746 }, { "epoch": 0.4564972156069261, "grad_norm": 1.6315429210662842, "learning_rate": 0.00011883382847466706, "loss": 1.6935, "step": 12747 }, { "epoch": 0.45653302773649435, "grad_norm": 2.3406314849853516, "learning_rate": 0.00011882243690849824, "loss": 1.343, "step": 12748 }, { "epoch": 0.4565688398660626, "grad_norm": 1.2826876640319824, "learning_rate": 0.00011881104508909778, "loss": 1.4219, "step": 12749 }, { "epoch": 0.45660465199563094, "grad_norm": 1.8661410808563232, "learning_rate": 0.00011879965301661897, "loss": 1.7659, "step": 12750 }, { "epoch": 0.4566404641251992, "grad_norm": 1.9449056386947632, "learning_rate": 0.00011878826069121505, "loss": 1.4855, "step": 12751 }, { "epoch": 0.4566762762547675, "grad_norm": 1.3876155614852905, "learning_rate": 0.00011877686811303937, "loss": 1.3685, "step": 12752 }, { "epoch": 0.4567120883843358, "grad_norm": 1.7499020099639893, "learning_rate": 0.00011876547528224511, "loss": 1.6676, "step": 12753 }, { "epoch": 0.45674790051390407, "grad_norm": 1.5707976818084717, "learning_rate": 0.00011875408219898561, "loss": 1.6834, "step": 12754 }, { "epoch": 0.45678371264347234, "grad_norm": 1.6355860233306885, "learning_rate": 0.00011874268886341409, "loss": 1.6243, "step": 12755 }, { "epoch": 0.4568195247730406, "grad_norm": 1.9601993560791016, "learning_rate": 0.00011873129527568388, "loss": 1.6477, "step": 12756 }, { "epoch": 0.45685533690260893, "grad_norm": 1.5781079530715942, "learning_rate": 0.00011871990143594827, "loss": 1.543, "step": 12757 }, { "epoch": 0.4568911490321772, "grad_norm": 2.1427834033966064, "learning_rate": 0.00011870850734436054, "loss": 1.5517, "step": 12758 }, { "epoch": 0.45692696116174547, "grad_norm": 1.4001954793930054, "learning_rate": 0.00011869711300107398, "loss": 1.3041, "step": 12759 }, { "epoch": 0.4569627732913138, "grad_norm": 1.4365400075912476, "learning_rate": 0.00011868571840624185, "loss": 1.5515, "step": 12760 }, { "epoch": 0.45699858542088206, "grad_norm": 1.820007562637329, "learning_rate": 0.0001186743235600175, "loss": 1.6383, "step": 12761 }, { "epoch": 0.45703439755045033, "grad_norm": 2.0829074382781982, "learning_rate": 0.0001186629284625542, "loss": 1.684, "step": 12762 }, { "epoch": 0.4570702096800186, "grad_norm": 2.3919122219085693, "learning_rate": 0.00011865153311400529, "loss": 1.4495, "step": 12763 }, { "epoch": 0.4571060218095869, "grad_norm": 1.607649564743042, "learning_rate": 0.00011864013751452405, "loss": 1.6239, "step": 12764 }, { "epoch": 0.4571418339391552, "grad_norm": 1.874072551727295, "learning_rate": 0.00011862874166426381, "loss": 1.3917, "step": 12765 }, { "epoch": 0.45717764606872346, "grad_norm": 1.8643479347229004, "learning_rate": 0.00011861734556337787, "loss": 1.5116, "step": 12766 }, { "epoch": 0.4572134581982918, "grad_norm": 1.378171682357788, "learning_rate": 0.00011860594921201958, "loss": 1.5747, "step": 12767 }, { "epoch": 0.45724927032786006, "grad_norm": 2.0554890632629395, "learning_rate": 0.00011859455261034225, "loss": 1.5593, "step": 12768 }, { "epoch": 0.4572850824574283, "grad_norm": 1.3280192613601685, "learning_rate": 0.00011858315575849914, "loss": 1.3724, "step": 12769 }, { "epoch": 0.4573208945869966, "grad_norm": 1.340054988861084, "learning_rate": 0.00011857175865664372, "loss": 1.5089, "step": 12770 }, { "epoch": 0.4573567067165649, "grad_norm": 2.0556211471557617, "learning_rate": 0.00011856036130492917, "loss": 1.8506, "step": 12771 }, { "epoch": 0.4573925188461332, "grad_norm": 1.7079956531524658, "learning_rate": 0.00011854896370350894, "loss": 1.5426, "step": 12772 }, { "epoch": 0.45742833097570146, "grad_norm": 1.4690487384796143, "learning_rate": 0.00011853756585253633, "loss": 1.3461, "step": 12773 }, { "epoch": 0.4574641431052697, "grad_norm": 1.5755233764648438, "learning_rate": 0.00011852616775216467, "loss": 1.6978, "step": 12774 }, { "epoch": 0.45749995523483805, "grad_norm": 1.4344228506088257, "learning_rate": 0.00011851476940254733, "loss": 1.5199, "step": 12775 }, { "epoch": 0.4575357673644063, "grad_norm": 1.373246431350708, "learning_rate": 0.00011850337080383764, "loss": 1.5406, "step": 12776 }, { "epoch": 0.4575715794939746, "grad_norm": 1.866769790649414, "learning_rate": 0.000118491971956189, "loss": 1.3924, "step": 12777 }, { "epoch": 0.4576073916235429, "grad_norm": 1.9502559900283813, "learning_rate": 0.00011848057285975467, "loss": 1.4118, "step": 12778 }, { "epoch": 0.4576432037531112, "grad_norm": 1.2907614707946777, "learning_rate": 0.00011846917351468811, "loss": 1.06, "step": 12779 }, { "epoch": 0.45767901588267945, "grad_norm": 1.657185673713684, "learning_rate": 0.00011845777392114263, "loss": 1.4635, "step": 12780 }, { "epoch": 0.4577148280122477, "grad_norm": 2.1318721771240234, "learning_rate": 0.00011844637407927161, "loss": 1.3132, "step": 12781 }, { "epoch": 0.45775064014181605, "grad_norm": 1.5741397142410278, "learning_rate": 0.00011843497398922842, "loss": 1.3551, "step": 12782 }, { "epoch": 0.4577864522713843, "grad_norm": 1.3207534551620483, "learning_rate": 0.00011842357365116645, "loss": 1.7398, "step": 12783 }, { "epoch": 0.4578222644009526, "grad_norm": 1.8568518161773682, "learning_rate": 0.00011841217306523904, "loss": 1.7068, "step": 12784 }, { "epoch": 0.4578580765305209, "grad_norm": 1.3285858631134033, "learning_rate": 0.00011840077223159965, "loss": 1.676, "step": 12785 }, { "epoch": 0.4578938886600892, "grad_norm": 2.2445781230926514, "learning_rate": 0.00011838937115040154, "loss": 1.5313, "step": 12786 }, { "epoch": 0.45792970078965745, "grad_norm": 1.4467315673828125, "learning_rate": 0.00011837796982179817, "loss": 1.3382, "step": 12787 }, { "epoch": 0.4579655129192257, "grad_norm": 1.7218992710113525, "learning_rate": 0.00011836656824594295, "loss": 1.685, "step": 12788 }, { "epoch": 0.45800132504879404, "grad_norm": 1.835891604423523, "learning_rate": 0.0001183551664229892, "loss": 1.7658, "step": 12789 }, { "epoch": 0.4580371371783623, "grad_norm": 1.723070740699768, "learning_rate": 0.0001183437643530904, "loss": 1.7154, "step": 12790 }, { "epoch": 0.4580729493079306, "grad_norm": 2.18641996383667, "learning_rate": 0.00011833236203639987, "loss": 1.5002, "step": 12791 }, { "epoch": 0.4581087614374989, "grad_norm": 1.8997712135314941, "learning_rate": 0.00011832095947307111, "loss": 1.8594, "step": 12792 }, { "epoch": 0.45814457356706717, "grad_norm": 2.286189317703247, "learning_rate": 0.00011830955666325748, "loss": 1.2922, "step": 12793 }, { "epoch": 0.45818038569663544, "grad_norm": 1.3605753183364868, "learning_rate": 0.00011829815360711234, "loss": 1.4984, "step": 12794 }, { "epoch": 0.4582161978262037, "grad_norm": 1.8744243383407593, "learning_rate": 0.00011828675030478915, "loss": 1.5561, "step": 12795 }, { "epoch": 0.45825200995577203, "grad_norm": 1.505820870399475, "learning_rate": 0.00011827534675644134, "loss": 1.3156, "step": 12796 }, { "epoch": 0.4582878220853403, "grad_norm": 1.856209397315979, "learning_rate": 0.00011826394296222229, "loss": 1.4686, "step": 12797 }, { "epoch": 0.45832363421490857, "grad_norm": 1.7676700353622437, "learning_rate": 0.00011825253892228547, "loss": 1.4413, "step": 12798 }, { "epoch": 0.4583594463444769, "grad_norm": 2.2490038871765137, "learning_rate": 0.00011824113463678427, "loss": 1.6963, "step": 12799 }, { "epoch": 0.45839525847404516, "grad_norm": 1.618058204650879, "learning_rate": 0.00011822973010587213, "loss": 1.6653, "step": 12800 }, { "epoch": 0.45843107060361343, "grad_norm": 1.5924299955368042, "learning_rate": 0.0001182183253297025, "loss": 1.3864, "step": 12801 }, { "epoch": 0.4584668827331817, "grad_norm": 2.89251971244812, "learning_rate": 0.00011820692030842879, "loss": 1.3747, "step": 12802 }, { "epoch": 0.45850269486275, "grad_norm": 1.7772977352142334, "learning_rate": 0.00011819551504220447, "loss": 1.5318, "step": 12803 }, { "epoch": 0.4585385069923183, "grad_norm": 1.6167027950286865, "learning_rate": 0.00011818410953118296, "loss": 1.3861, "step": 12804 }, { "epoch": 0.45857431912188656, "grad_norm": 1.2884777784347534, "learning_rate": 0.0001181727037755177, "loss": 0.8727, "step": 12805 }, { "epoch": 0.4586101312514549, "grad_norm": 1.5280265808105469, "learning_rate": 0.00011816129777536216, "loss": 1.255, "step": 12806 }, { "epoch": 0.45864594338102316, "grad_norm": 1.8953804969787598, "learning_rate": 0.00011814989153086977, "loss": 1.5201, "step": 12807 }, { "epoch": 0.4586817555105914, "grad_norm": 1.4978008270263672, "learning_rate": 0.00011813848504219403, "loss": 1.3398, "step": 12808 }, { "epoch": 0.4587175676401597, "grad_norm": 1.7780437469482422, "learning_rate": 0.00011812707830948835, "loss": 1.6133, "step": 12809 }, { "epoch": 0.458753379769728, "grad_norm": 1.5132312774658203, "learning_rate": 0.0001181156713329062, "loss": 1.4327, "step": 12810 }, { "epoch": 0.4587891918992963, "grad_norm": 1.6807321310043335, "learning_rate": 0.0001181042641126011, "loss": 1.4845, "step": 12811 }, { "epoch": 0.45882500402886456, "grad_norm": 1.6463050842285156, "learning_rate": 0.00011809285664872645, "loss": 1.3477, "step": 12812 }, { "epoch": 0.4588608161584329, "grad_norm": 1.670136570930481, "learning_rate": 0.00011808144894143575, "loss": 1.5761, "step": 12813 }, { "epoch": 0.45889662828800115, "grad_norm": 1.6936041116714478, "learning_rate": 0.00011807004099088251, "loss": 1.4741, "step": 12814 }, { "epoch": 0.4589324404175694, "grad_norm": 1.5984362363815308, "learning_rate": 0.00011805863279722014, "loss": 1.6037, "step": 12815 }, { "epoch": 0.4589682525471377, "grad_norm": 1.9859976768493652, "learning_rate": 0.00011804722436060218, "loss": 1.6921, "step": 12816 }, { "epoch": 0.459004064676706, "grad_norm": 1.768145203590393, "learning_rate": 0.00011803581568118207, "loss": 1.322, "step": 12817 }, { "epoch": 0.4590398768062743, "grad_norm": 1.2715734243392944, "learning_rate": 0.00011802440675911335, "loss": 1.4228, "step": 12818 }, { "epoch": 0.45907568893584255, "grad_norm": 1.2998805046081543, "learning_rate": 0.00011801299759454947, "loss": 1.2947, "step": 12819 }, { "epoch": 0.4591115010654109, "grad_norm": 2.153787612915039, "learning_rate": 0.00011800158818764395, "loss": 1.8239, "step": 12820 }, { "epoch": 0.45914731319497915, "grad_norm": 1.4369038343429565, "learning_rate": 0.0001179901785385503, "loss": 1.4372, "step": 12821 }, { "epoch": 0.4591831253245474, "grad_norm": 2.008143901824951, "learning_rate": 0.00011797876864742198, "loss": 1.6738, "step": 12822 }, { "epoch": 0.4592189374541157, "grad_norm": 1.253076434135437, "learning_rate": 0.00011796735851441254, "loss": 1.2106, "step": 12823 }, { "epoch": 0.459254749583684, "grad_norm": 1.9890899658203125, "learning_rate": 0.00011795594813967543, "loss": 1.865, "step": 12824 }, { "epoch": 0.4592905617132523, "grad_norm": 1.4723730087280273, "learning_rate": 0.00011794453752336425, "loss": 1.3423, "step": 12825 }, { "epoch": 0.45932637384282055, "grad_norm": 3.3311033248901367, "learning_rate": 0.00011793312666563241, "loss": 1.5936, "step": 12826 }, { "epoch": 0.45936218597238887, "grad_norm": 1.470004916191101, "learning_rate": 0.00011792171556663353, "loss": 1.1274, "step": 12827 }, { "epoch": 0.45939799810195714, "grad_norm": 1.7341986894607544, "learning_rate": 0.00011791030422652105, "loss": 1.1953, "step": 12828 }, { "epoch": 0.4594338102315254, "grad_norm": 1.5090793371200562, "learning_rate": 0.00011789889264544855, "loss": 1.3247, "step": 12829 }, { "epoch": 0.4594696223610937, "grad_norm": 1.5003623962402344, "learning_rate": 0.00011788748082356955, "loss": 1.3318, "step": 12830 }, { "epoch": 0.459505434490662, "grad_norm": 2.2069389820098877, "learning_rate": 0.00011787606876103753, "loss": 1.6227, "step": 12831 }, { "epoch": 0.45954124662023027, "grad_norm": 1.5146262645721436, "learning_rate": 0.00011786465645800609, "loss": 1.5597, "step": 12832 }, { "epoch": 0.45957705874979854, "grad_norm": 1.5742777585983276, "learning_rate": 0.00011785324391462873, "loss": 1.4421, "step": 12833 }, { "epoch": 0.45961287087936686, "grad_norm": 1.3737620115280151, "learning_rate": 0.000117841831131059, "loss": 1.6641, "step": 12834 }, { "epoch": 0.45964868300893513, "grad_norm": 1.7888473272323608, "learning_rate": 0.00011783041810745045, "loss": 1.2693, "step": 12835 }, { "epoch": 0.4596844951385034, "grad_norm": 1.42727530002594, "learning_rate": 0.00011781900484395665, "loss": 1.5367, "step": 12836 }, { "epoch": 0.45972030726807167, "grad_norm": 1.4414780139923096, "learning_rate": 0.00011780759134073107, "loss": 2.0235, "step": 12837 }, { "epoch": 0.45975611939764, "grad_norm": 1.4907771348953247, "learning_rate": 0.00011779617759792738, "loss": 1.4951, "step": 12838 }, { "epoch": 0.45979193152720826, "grad_norm": 1.888153314590454, "learning_rate": 0.00011778476361569903, "loss": 1.3646, "step": 12839 }, { "epoch": 0.45982774365677653, "grad_norm": 1.6788287162780762, "learning_rate": 0.00011777334939419966, "loss": 1.3867, "step": 12840 }, { "epoch": 0.45986355578634486, "grad_norm": 1.3962904214859009, "learning_rate": 0.00011776193493358278, "loss": 1.6541, "step": 12841 }, { "epoch": 0.4598993679159131, "grad_norm": 1.7222188711166382, "learning_rate": 0.00011775052023400197, "loss": 1.5073, "step": 12842 }, { "epoch": 0.4599351800454814, "grad_norm": 1.299524188041687, "learning_rate": 0.0001177391052956108, "loss": 1.1971, "step": 12843 }, { "epoch": 0.45997099217504966, "grad_norm": 1.7616795301437378, "learning_rate": 0.00011772769011856286, "loss": 1.499, "step": 12844 }, { "epoch": 0.460006804304618, "grad_norm": 1.8444628715515137, "learning_rate": 0.00011771627470301174, "loss": 1.5539, "step": 12845 }, { "epoch": 0.46004261643418626, "grad_norm": 1.4378998279571533, "learning_rate": 0.00011770485904911099, "loss": 1.5755, "step": 12846 }, { "epoch": 0.4600784285637545, "grad_norm": 1.6388529539108276, "learning_rate": 0.0001176934431570142, "loss": 1.4707, "step": 12847 }, { "epoch": 0.46011424069332285, "grad_norm": 1.789455771446228, "learning_rate": 0.00011768202702687492, "loss": 1.3956, "step": 12848 }, { "epoch": 0.4601500528228911, "grad_norm": 1.9952750205993652, "learning_rate": 0.00011767061065884682, "loss": 1.7959, "step": 12849 }, { "epoch": 0.4601858649524594, "grad_norm": 2.108474016189575, "learning_rate": 0.00011765919405308341, "loss": 1.4997, "step": 12850 }, { "epoch": 0.46022167708202766, "grad_norm": 1.9382110834121704, "learning_rate": 0.00011764777720973835, "loss": 1.4933, "step": 12851 }, { "epoch": 0.460257489211596, "grad_norm": 1.7366992235183716, "learning_rate": 0.00011763636012896518, "loss": 1.6449, "step": 12852 }, { "epoch": 0.46029330134116425, "grad_norm": 1.8171309232711792, "learning_rate": 0.00011762494281091756, "loss": 1.6787, "step": 12853 }, { "epoch": 0.4603291134707325, "grad_norm": 1.760102391242981, "learning_rate": 0.00011761352525574905, "loss": 1.2648, "step": 12854 }, { "epoch": 0.46036492560030084, "grad_norm": 2.294532537460327, "learning_rate": 0.00011760210746361329, "loss": 1.6493, "step": 12855 }, { "epoch": 0.4604007377298691, "grad_norm": 1.5895004272460938, "learning_rate": 0.00011759068943466389, "loss": 1.3155, "step": 12856 }, { "epoch": 0.4604365498594374, "grad_norm": 1.6277079582214355, "learning_rate": 0.00011757927116905442, "loss": 1.4816, "step": 12857 }, { "epoch": 0.46047236198900565, "grad_norm": 1.8704755306243896, "learning_rate": 0.00011756785266693857, "loss": 1.4445, "step": 12858 }, { "epoch": 0.460508174118574, "grad_norm": 1.737906813621521, "learning_rate": 0.00011755643392846991, "loss": 1.7793, "step": 12859 }, { "epoch": 0.46054398624814225, "grad_norm": 1.859326720237732, "learning_rate": 0.00011754501495380209, "loss": 1.6184, "step": 12860 }, { "epoch": 0.4605797983777105, "grad_norm": 1.8571624755859375, "learning_rate": 0.00011753359574308869, "loss": 1.5855, "step": 12861 }, { "epoch": 0.46061561050727884, "grad_norm": 1.6223071813583374, "learning_rate": 0.00011752217629648341, "loss": 1.3289, "step": 12862 }, { "epoch": 0.4606514226368471, "grad_norm": 1.7638866901397705, "learning_rate": 0.00011751075661413982, "loss": 1.5636, "step": 12863 }, { "epoch": 0.4606872347664154, "grad_norm": 1.5971314907073975, "learning_rate": 0.00011749933669621161, "loss": 1.6486, "step": 12864 }, { "epoch": 0.46072304689598365, "grad_norm": 1.6933928728103638, "learning_rate": 0.0001174879165428524, "loss": 1.8318, "step": 12865 }, { "epoch": 0.46075885902555197, "grad_norm": 1.8240853548049927, "learning_rate": 0.00011747649615421581, "loss": 1.5988, "step": 12866 }, { "epoch": 0.46079467115512024, "grad_norm": 1.140206217765808, "learning_rate": 0.00011746507553045552, "loss": 1.3376, "step": 12867 }, { "epoch": 0.4608304832846885, "grad_norm": 1.5556854009628296, "learning_rate": 0.00011745365467172516, "loss": 1.4646, "step": 12868 }, { "epoch": 0.46086629541425683, "grad_norm": 2.0136656761169434, "learning_rate": 0.00011744223357817841, "loss": 1.8497, "step": 12869 }, { "epoch": 0.4609021075438251, "grad_norm": 2.2033441066741943, "learning_rate": 0.00011743081224996888, "loss": 1.5519, "step": 12870 }, { "epoch": 0.46093791967339337, "grad_norm": 1.670477032661438, "learning_rate": 0.00011741939068725027, "loss": 1.5283, "step": 12871 }, { "epoch": 0.46097373180296164, "grad_norm": 2.0751805305480957, "learning_rate": 0.00011740796889017623, "loss": 1.4366, "step": 12872 }, { "epoch": 0.46100954393252996, "grad_norm": 1.6487994194030762, "learning_rate": 0.0001173965468589004, "loss": 1.3776, "step": 12873 }, { "epoch": 0.46104535606209823, "grad_norm": 1.5400201082229614, "learning_rate": 0.0001173851245935765, "loss": 1.4772, "step": 12874 }, { "epoch": 0.4610811681916665, "grad_norm": 1.987924337387085, "learning_rate": 0.00011737370209435816, "loss": 1.3146, "step": 12875 }, { "epoch": 0.4611169803212348, "grad_norm": 2.468710422515869, "learning_rate": 0.00011736227936139908, "loss": 1.2949, "step": 12876 }, { "epoch": 0.4611527924508031, "grad_norm": 1.6486579179763794, "learning_rate": 0.00011735085639485291, "loss": 1.462, "step": 12877 }, { "epoch": 0.46118860458037136, "grad_norm": 1.8904602527618408, "learning_rate": 0.00011733943319487337, "loss": 1.6574, "step": 12878 }, { "epoch": 0.46122441670993963, "grad_norm": 1.5434259176254272, "learning_rate": 0.00011732800976161408, "loss": 1.4993, "step": 12879 }, { "epoch": 0.46126022883950796, "grad_norm": 1.8802088499069214, "learning_rate": 0.00011731658609522881, "loss": 1.2326, "step": 12880 }, { "epoch": 0.4612960409690762, "grad_norm": 1.7803751230239868, "learning_rate": 0.0001173051621958712, "loss": 2.1058, "step": 12881 }, { "epoch": 0.4613318530986445, "grad_norm": 1.6606054306030273, "learning_rate": 0.00011729373806369499, "loss": 1.6868, "step": 12882 }, { "epoch": 0.4613676652282128, "grad_norm": 1.609900951385498, "learning_rate": 0.0001172823136988538, "loss": 1.2621, "step": 12883 }, { "epoch": 0.4614034773577811, "grad_norm": 2.8169360160827637, "learning_rate": 0.00011727088910150137, "loss": 1.2055, "step": 12884 }, { "epoch": 0.46143928948734936, "grad_norm": 1.9346868991851807, "learning_rate": 0.00011725946427179142, "loss": 1.8939, "step": 12885 }, { "epoch": 0.4614751016169176, "grad_norm": 1.8201242685317993, "learning_rate": 0.00011724803920987761, "loss": 1.7242, "step": 12886 }, { "epoch": 0.46151091374648595, "grad_norm": 1.8943721055984497, "learning_rate": 0.00011723661391591371, "loss": 1.7006, "step": 12887 }, { "epoch": 0.4615467258760542, "grad_norm": 1.6095993518829346, "learning_rate": 0.00011722518839005341, "loss": 1.4559, "step": 12888 }, { "epoch": 0.4615825380056225, "grad_norm": 1.338904619216919, "learning_rate": 0.00011721376263245041, "loss": 1.6563, "step": 12889 }, { "epoch": 0.4616183501351908, "grad_norm": 1.563049554824829, "learning_rate": 0.00011720233664325846, "loss": 1.2904, "step": 12890 }, { "epoch": 0.4616541622647591, "grad_norm": 1.619492530822754, "learning_rate": 0.00011719091042263124, "loss": 1.5699, "step": 12891 }, { "epoch": 0.46168997439432735, "grad_norm": 1.8848820924758911, "learning_rate": 0.00011717948397072246, "loss": 1.7522, "step": 12892 }, { "epoch": 0.4617257865238956, "grad_norm": 1.626729130744934, "learning_rate": 0.00011716805728768593, "loss": 1.4691, "step": 12893 }, { "epoch": 0.46176159865346394, "grad_norm": 2.1712372303009033, "learning_rate": 0.00011715663037367532, "loss": 1.5208, "step": 12894 }, { "epoch": 0.4617974107830322, "grad_norm": 1.8824517726898193, "learning_rate": 0.00011714520322884439, "loss": 1.3719, "step": 12895 }, { "epoch": 0.4618332229126005, "grad_norm": 2.1174161434173584, "learning_rate": 0.00011713377585334684, "loss": 1.4144, "step": 12896 }, { "epoch": 0.4618690350421688, "grad_norm": 1.8503297567367554, "learning_rate": 0.00011712234824733644, "loss": 1.5521, "step": 12897 }, { "epoch": 0.4619048471717371, "grad_norm": 1.4610905647277832, "learning_rate": 0.00011711092041096693, "loss": 1.3756, "step": 12898 }, { "epoch": 0.46194065930130535, "grad_norm": 2.196420907974243, "learning_rate": 0.0001170994923443921, "loss": 1.6765, "step": 12899 }, { "epoch": 0.4619764714308736, "grad_norm": 1.5925959348678589, "learning_rate": 0.00011708806404776563, "loss": 1.5467, "step": 12900 }, { "epoch": 0.46201228356044194, "grad_norm": 1.4882124662399292, "learning_rate": 0.00011707663552124128, "loss": 1.7759, "step": 12901 }, { "epoch": 0.4620480956900102, "grad_norm": 1.9978431463241577, "learning_rate": 0.00011706520676497285, "loss": 1.7564, "step": 12902 }, { "epoch": 0.4620839078195785, "grad_norm": 1.8712289333343506, "learning_rate": 0.00011705377777911406, "loss": 1.5388, "step": 12903 }, { "epoch": 0.4621197199491468, "grad_norm": 3.0363831520080566, "learning_rate": 0.0001170423485638187, "loss": 1.5825, "step": 12904 }, { "epoch": 0.46215553207871507, "grad_norm": 1.3153165578842163, "learning_rate": 0.00011703091911924051, "loss": 1.306, "step": 12905 }, { "epoch": 0.46219134420828334, "grad_norm": 1.8656954765319824, "learning_rate": 0.0001170194894455333, "loss": 1.3723, "step": 12906 }, { "epoch": 0.4622271563378516, "grad_norm": 1.8745927810668945, "learning_rate": 0.0001170080595428508, "loss": 1.4533, "step": 12907 }, { "epoch": 0.46226296846741993, "grad_norm": 1.440604329109192, "learning_rate": 0.00011699662941134679, "loss": 1.3826, "step": 12908 }, { "epoch": 0.4622987805969882, "grad_norm": 1.6274751424789429, "learning_rate": 0.00011698519905117507, "loss": 1.3327, "step": 12909 }, { "epoch": 0.46233459272655647, "grad_norm": 1.5062453746795654, "learning_rate": 0.00011697376846248937, "loss": 1.5934, "step": 12910 }, { "epoch": 0.4623704048561248, "grad_norm": 1.5800942182540894, "learning_rate": 0.00011696233764544353, "loss": 1.3237, "step": 12911 }, { "epoch": 0.46240621698569306, "grad_norm": 1.7886601686477661, "learning_rate": 0.00011695090660019132, "loss": 1.512, "step": 12912 }, { "epoch": 0.46244202911526133, "grad_norm": 1.6986116170883179, "learning_rate": 0.00011693947532688653, "loss": 1.5372, "step": 12913 }, { "epoch": 0.4624778412448296, "grad_norm": 1.8401761054992676, "learning_rate": 0.00011692804382568294, "loss": 1.5287, "step": 12914 }, { "epoch": 0.4625136533743979, "grad_norm": 1.7199172973632812, "learning_rate": 0.00011691661209673437, "loss": 1.5185, "step": 12915 }, { "epoch": 0.4625494655039662, "grad_norm": 1.6660505533218384, "learning_rate": 0.00011690518014019458, "loss": 1.3418, "step": 12916 }, { "epoch": 0.46258527763353446, "grad_norm": 2.050318956375122, "learning_rate": 0.00011689374795621744, "loss": 1.4391, "step": 12917 }, { "epoch": 0.4626210897631028, "grad_norm": 1.6393921375274658, "learning_rate": 0.00011688231554495668, "loss": 1.4133, "step": 12918 }, { "epoch": 0.46265690189267106, "grad_norm": 1.5814021825790405, "learning_rate": 0.00011687088290656613, "loss": 1.2826, "step": 12919 }, { "epoch": 0.4626927140222393, "grad_norm": 1.5501033067703247, "learning_rate": 0.00011685945004119965, "loss": 1.6777, "step": 12920 }, { "epoch": 0.4627285261518076, "grad_norm": 1.8156019449234009, "learning_rate": 0.00011684801694901099, "loss": 1.4462, "step": 12921 }, { "epoch": 0.4627643382813759, "grad_norm": 1.7225736379623413, "learning_rate": 0.00011683658363015402, "loss": 1.2303, "step": 12922 }, { "epoch": 0.4628001504109442, "grad_norm": 1.728131890296936, "learning_rate": 0.0001168251500847825, "loss": 1.4446, "step": 12923 }, { "epoch": 0.46283596254051246, "grad_norm": 2.56362247467041, "learning_rate": 0.00011681371631305032, "loss": 1.6334, "step": 12924 }, { "epoch": 0.4628717746700808, "grad_norm": 2.016491413116455, "learning_rate": 0.00011680228231511123, "loss": 1.3181, "step": 12925 }, { "epoch": 0.46290758679964905, "grad_norm": 1.6673835515975952, "learning_rate": 0.00011679084809111915, "loss": 1.51, "step": 12926 }, { "epoch": 0.4629433989292173, "grad_norm": 1.5395658016204834, "learning_rate": 0.00011677941364122787, "loss": 1.3948, "step": 12927 }, { "epoch": 0.4629792110587856, "grad_norm": 1.4805071353912354, "learning_rate": 0.0001167679789655912, "loss": 1.3565, "step": 12928 }, { "epoch": 0.4630150231883539, "grad_norm": 1.8518954515457153, "learning_rate": 0.00011675654406436301, "loss": 1.2927, "step": 12929 }, { "epoch": 0.4630508353179222, "grad_norm": 2.3132810592651367, "learning_rate": 0.00011674510893769713, "loss": 1.5319, "step": 12930 }, { "epoch": 0.46308664744749045, "grad_norm": 1.4110107421875, "learning_rate": 0.00011673367358574741, "loss": 0.9963, "step": 12931 }, { "epoch": 0.4631224595770588, "grad_norm": 1.7076191902160645, "learning_rate": 0.00011672223800866768, "loss": 1.7017, "step": 12932 }, { "epoch": 0.46315827170662704, "grad_norm": 2.103776693344116, "learning_rate": 0.00011671080220661183, "loss": 1.4991, "step": 12933 }, { "epoch": 0.4631940838361953, "grad_norm": 1.6993529796600342, "learning_rate": 0.00011669936617973367, "loss": 1.4374, "step": 12934 }, { "epoch": 0.4632298959657636, "grad_norm": 1.5589717626571655, "learning_rate": 0.00011668792992818714, "loss": 1.3671, "step": 12935 }, { "epoch": 0.4632657080953319, "grad_norm": 1.5046939849853516, "learning_rate": 0.000116676493452126, "loss": 1.6337, "step": 12936 }, { "epoch": 0.4633015202249002, "grad_norm": 1.6186277866363525, "learning_rate": 0.00011666505675170413, "loss": 1.6858, "step": 12937 }, { "epoch": 0.46333733235446845, "grad_norm": 2.007828950881958, "learning_rate": 0.00011665361982707543, "loss": 1.6257, "step": 12938 }, { "epoch": 0.46337314448403677, "grad_norm": 1.3552885055541992, "learning_rate": 0.00011664218267839375, "loss": 1.3457, "step": 12939 }, { "epoch": 0.46340895661360504, "grad_norm": 2.314358949661255, "learning_rate": 0.000116630745305813, "loss": 1.3535, "step": 12940 }, { "epoch": 0.4634447687431733, "grad_norm": 1.718798279762268, "learning_rate": 0.00011661930770948699, "loss": 1.4442, "step": 12941 }, { "epoch": 0.4634805808727416, "grad_norm": 1.6392550468444824, "learning_rate": 0.00011660786988956964, "loss": 1.3206, "step": 12942 }, { "epoch": 0.4635163930023099, "grad_norm": 1.5570787191390991, "learning_rate": 0.00011659643184621485, "loss": 1.3863, "step": 12943 }, { "epoch": 0.46355220513187817, "grad_norm": 2.412844181060791, "learning_rate": 0.00011658499357957646, "loss": 1.6282, "step": 12944 }, { "epoch": 0.46358801726144644, "grad_norm": 1.7498209476470947, "learning_rate": 0.00011657355508980836, "loss": 1.4421, "step": 12945 }, { "epoch": 0.46362382939101476, "grad_norm": 1.9625868797302246, "learning_rate": 0.00011656211637706449, "loss": 1.8111, "step": 12946 }, { "epoch": 0.46365964152058303, "grad_norm": 2.1074130535125732, "learning_rate": 0.00011655067744149865, "loss": 1.4252, "step": 12947 }, { "epoch": 0.4636954536501513, "grad_norm": 1.5764514207839966, "learning_rate": 0.00011653923828326485, "loss": 1.2029, "step": 12948 }, { "epoch": 0.46373126577971957, "grad_norm": 1.5708736181259155, "learning_rate": 0.0001165277989025169, "loss": 1.3832, "step": 12949 }, { "epoch": 0.4637670779092879, "grad_norm": 1.6243404150009155, "learning_rate": 0.00011651635929940874, "loss": 1.2595, "step": 12950 }, { "epoch": 0.46380289003885616, "grad_norm": 2.1377639770507812, "learning_rate": 0.00011650491947409427, "loss": 1.7661, "step": 12951 }, { "epoch": 0.46383870216842443, "grad_norm": 1.541272759437561, "learning_rate": 0.00011649347942672741, "loss": 1.5135, "step": 12952 }, { "epoch": 0.46387451429799276, "grad_norm": 2.0266079902648926, "learning_rate": 0.00011648203915746208, "loss": 1.7173, "step": 12953 }, { "epoch": 0.463910326427561, "grad_norm": 1.9679988622665405, "learning_rate": 0.00011647059866645213, "loss": 1.5395, "step": 12954 }, { "epoch": 0.4639461385571293, "grad_norm": 1.322680950164795, "learning_rate": 0.00011645915795385154, "loss": 1.603, "step": 12955 }, { "epoch": 0.46398195068669756, "grad_norm": 1.8947259187698364, "learning_rate": 0.0001164477170198142, "loss": 1.292, "step": 12956 }, { "epoch": 0.4640177628162659, "grad_norm": 1.686653733253479, "learning_rate": 0.00011643627586449406, "loss": 1.6056, "step": 12957 }, { "epoch": 0.46405357494583416, "grad_norm": 2.2820146083831787, "learning_rate": 0.000116424834488045, "loss": 1.4883, "step": 12958 }, { "epoch": 0.4640893870754024, "grad_norm": 2.1820619106292725, "learning_rate": 0.00011641339289062101, "loss": 1.6662, "step": 12959 }, { "epoch": 0.46412519920497075, "grad_norm": 2.1300809383392334, "learning_rate": 0.00011640195107237596, "loss": 1.3605, "step": 12960 }, { "epoch": 0.464161011334539, "grad_norm": 1.3338943719863892, "learning_rate": 0.00011639050903346387, "loss": 1.4019, "step": 12961 }, { "epoch": 0.4641968234641073, "grad_norm": 1.9710988998413086, "learning_rate": 0.00011637906677403859, "loss": 1.3826, "step": 12962 }, { "epoch": 0.46423263559367556, "grad_norm": 1.3079078197479248, "learning_rate": 0.00011636762429425407, "loss": 1.1247, "step": 12963 }, { "epoch": 0.4642684477232439, "grad_norm": 1.8297098875045776, "learning_rate": 0.0001163561815942643, "loss": 1.5155, "step": 12964 }, { "epoch": 0.46430425985281215, "grad_norm": 1.8603323698043823, "learning_rate": 0.00011634473867422322, "loss": 1.4551, "step": 12965 }, { "epoch": 0.4643400719823804, "grad_norm": 1.6557583808898926, "learning_rate": 0.00011633329553428476, "loss": 1.29, "step": 12966 }, { "epoch": 0.46437588411194874, "grad_norm": 1.406653881072998, "learning_rate": 0.00011632185217460283, "loss": 1.5551, "step": 12967 }, { "epoch": 0.464411696241517, "grad_norm": 1.6777414083480835, "learning_rate": 0.00011631040859533148, "loss": 1.3424, "step": 12968 }, { "epoch": 0.4644475083710853, "grad_norm": 2.046297311782837, "learning_rate": 0.00011629896479662461, "loss": 1.5278, "step": 12969 }, { "epoch": 0.46448332050065355, "grad_norm": 1.4424058198928833, "learning_rate": 0.0001162875207786362, "loss": 1.3149, "step": 12970 }, { "epoch": 0.4645191326302219, "grad_norm": 1.1536939144134521, "learning_rate": 0.00011627607654152022, "loss": 1.4943, "step": 12971 }, { "epoch": 0.46455494475979014, "grad_norm": 1.738486409187317, "learning_rate": 0.0001162646320854306, "loss": 1.3925, "step": 12972 }, { "epoch": 0.4645907568893584, "grad_norm": 2.092022657394409, "learning_rate": 0.00011625318741052133, "loss": 1.7829, "step": 12973 }, { "epoch": 0.4646265690189267, "grad_norm": 3.35194993019104, "learning_rate": 0.0001162417425169464, "loss": 1.3859, "step": 12974 }, { "epoch": 0.464662381148495, "grad_norm": 1.7595601081848145, "learning_rate": 0.00011623029740485978, "loss": 1.4044, "step": 12975 }, { "epoch": 0.4646981932780633, "grad_norm": 1.210787057876587, "learning_rate": 0.00011621885207441541, "loss": 1.5113, "step": 12976 }, { "epoch": 0.46473400540763155, "grad_norm": 1.481057047843933, "learning_rate": 0.00011620740652576736, "loss": 1.1624, "step": 12977 }, { "epoch": 0.46476981753719987, "grad_norm": 2.3686330318450928, "learning_rate": 0.0001161959607590695, "loss": 1.4887, "step": 12978 }, { "epoch": 0.46480562966676814, "grad_norm": 2.417525053024292, "learning_rate": 0.00011618451477447596, "loss": 1.5583, "step": 12979 }, { "epoch": 0.4648414417963364, "grad_norm": 2.8242523670196533, "learning_rate": 0.00011617306857214059, "loss": 1.6658, "step": 12980 }, { "epoch": 0.4648772539259047, "grad_norm": 2.044356107711792, "learning_rate": 0.00011616162215221744, "loss": 1.3755, "step": 12981 }, { "epoch": 0.464913066055473, "grad_norm": 1.7669847011566162, "learning_rate": 0.00011615017551486054, "loss": 1.6859, "step": 12982 }, { "epoch": 0.46494887818504127, "grad_norm": 1.8860310316085815, "learning_rate": 0.00011613872866022384, "loss": 1.4955, "step": 12983 }, { "epoch": 0.46498469031460954, "grad_norm": 1.8935774564743042, "learning_rate": 0.00011612728158846138, "loss": 1.7469, "step": 12984 }, { "epoch": 0.46502050244417786, "grad_norm": 1.7466204166412354, "learning_rate": 0.00011611583429972715, "loss": 1.3623, "step": 12985 }, { "epoch": 0.46505631457374613, "grad_norm": 2.1702635288238525, "learning_rate": 0.00011610438679417515, "loss": 1.4292, "step": 12986 }, { "epoch": 0.4650921267033144, "grad_norm": 1.6326416730880737, "learning_rate": 0.0001160929390719594, "loss": 1.5971, "step": 12987 }, { "epoch": 0.46512793883288267, "grad_norm": 3.18381667137146, "learning_rate": 0.00011608149113323392, "loss": 1.5523, "step": 12988 }, { "epoch": 0.465163750962451, "grad_norm": 2.2429869174957275, "learning_rate": 0.00011607004297815271, "loss": 1.2013, "step": 12989 }, { "epoch": 0.46519956309201926, "grad_norm": 1.3546289205551147, "learning_rate": 0.00011605859460686981, "loss": 1.4236, "step": 12990 }, { "epoch": 0.46523537522158753, "grad_norm": 1.5490727424621582, "learning_rate": 0.00011604714601953922, "loss": 1.567, "step": 12991 }, { "epoch": 0.46527118735115586, "grad_norm": 1.2240389585494995, "learning_rate": 0.00011603569721631499, "loss": 1.0347, "step": 12992 }, { "epoch": 0.4653069994807241, "grad_norm": 1.6459600925445557, "learning_rate": 0.00011602424819735111, "loss": 1.7073, "step": 12993 }, { "epoch": 0.4653428116102924, "grad_norm": 1.6901960372924805, "learning_rate": 0.00011601279896280167, "loss": 1.72, "step": 12994 }, { "epoch": 0.46537862373986066, "grad_norm": 1.3724249601364136, "learning_rate": 0.00011600134951282067, "loss": 1.6381, "step": 12995 }, { "epoch": 0.465414435869429, "grad_norm": 2.0271923542022705, "learning_rate": 0.00011598989984756216, "loss": 1.5456, "step": 12996 }, { "epoch": 0.46545024799899726, "grad_norm": 1.7223308086395264, "learning_rate": 0.0001159784499671802, "loss": 1.9994, "step": 12997 }, { "epoch": 0.4654860601285655, "grad_norm": 1.5626134872436523, "learning_rate": 0.00011596699987182873, "loss": 1.4624, "step": 12998 }, { "epoch": 0.46552187225813385, "grad_norm": 1.7725961208343506, "learning_rate": 0.00011595554956166195, "loss": 1.4648, "step": 12999 }, { "epoch": 0.4655576843877021, "grad_norm": 1.5563966035842896, "learning_rate": 0.00011594409903683376, "loss": 1.4602, "step": 13000 }, { "epoch": 0.4655934965172704, "grad_norm": 1.607021450996399, "learning_rate": 0.00011593264829749835, "loss": 1.4378, "step": 13001 }, { "epoch": 0.46562930864683866, "grad_norm": 1.4815516471862793, "learning_rate": 0.00011592119734380966, "loss": 1.705, "step": 13002 }, { "epoch": 0.465665120776407, "grad_norm": 1.9756717681884766, "learning_rate": 0.00011590974617592182, "loss": 1.6251, "step": 13003 }, { "epoch": 0.46570093290597525, "grad_norm": 1.837310791015625, "learning_rate": 0.00011589829479398886, "loss": 1.8303, "step": 13004 }, { "epoch": 0.4657367450355435, "grad_norm": 2.223160982131958, "learning_rate": 0.00011588684319816485, "loss": 1.3854, "step": 13005 }, { "epoch": 0.46577255716511184, "grad_norm": 1.3153750896453857, "learning_rate": 0.00011587539138860388, "loss": 1.4643, "step": 13006 }, { "epoch": 0.4658083692946801, "grad_norm": 1.9978464841842651, "learning_rate": 0.00011586393936545995, "loss": 1.6035, "step": 13007 }, { "epoch": 0.4658441814242484, "grad_norm": 1.5772230625152588, "learning_rate": 0.00011585248712888724, "loss": 1.7976, "step": 13008 }, { "epoch": 0.46587999355381665, "grad_norm": 1.839242935180664, "learning_rate": 0.0001158410346790397, "loss": 1.4875, "step": 13009 }, { "epoch": 0.465915805683385, "grad_norm": 1.7222756147384644, "learning_rate": 0.00011582958201607152, "loss": 1.562, "step": 13010 }, { "epoch": 0.46595161781295324, "grad_norm": 1.490675687789917, "learning_rate": 0.0001158181291401367, "loss": 1.5901, "step": 13011 }, { "epoch": 0.4659874299425215, "grad_norm": 1.3239779472351074, "learning_rate": 0.00011580667605138937, "loss": 1.3461, "step": 13012 }, { "epoch": 0.46602324207208984, "grad_norm": 1.3944867849349976, "learning_rate": 0.0001157952227499836, "loss": 1.8864, "step": 13013 }, { "epoch": 0.4660590542016581, "grad_norm": 1.853308916091919, "learning_rate": 0.0001157837692360735, "loss": 1.388, "step": 13014 }, { "epoch": 0.4660948663312264, "grad_norm": 1.9267886877059937, "learning_rate": 0.00011577231550981313, "loss": 1.7276, "step": 13015 }, { "epoch": 0.46613067846079465, "grad_norm": 1.8433860540390015, "learning_rate": 0.00011576086157135659, "loss": 1.4372, "step": 13016 }, { "epoch": 0.46616649059036297, "grad_norm": 1.6603800058364868, "learning_rate": 0.00011574940742085803, "loss": 1.5427, "step": 13017 }, { "epoch": 0.46620230271993124, "grad_norm": 1.883219838142395, "learning_rate": 0.00011573795305847146, "loss": 1.6654, "step": 13018 }, { "epoch": 0.4662381148494995, "grad_norm": 1.6327214241027832, "learning_rate": 0.00011572649848435104, "loss": 1.7289, "step": 13019 }, { "epoch": 0.46627392697906783, "grad_norm": 1.6758432388305664, "learning_rate": 0.00011571504369865087, "loss": 1.3519, "step": 13020 }, { "epoch": 0.4663097391086361, "grad_norm": 2.6810929775238037, "learning_rate": 0.0001157035887015251, "loss": 1.2821, "step": 13021 }, { "epoch": 0.46634555123820437, "grad_norm": 1.7383793592453003, "learning_rate": 0.00011569213349312773, "loss": 2.0079, "step": 13022 }, { "epoch": 0.46638136336777264, "grad_norm": 2.8930468559265137, "learning_rate": 0.000115680678073613, "loss": 1.5185, "step": 13023 }, { "epoch": 0.46641717549734096, "grad_norm": 1.9949698448181152, "learning_rate": 0.00011566922244313496, "loss": 1.3717, "step": 13024 }, { "epoch": 0.46645298762690923, "grad_norm": 1.4897794723510742, "learning_rate": 0.00011565776660184772, "loss": 1.3828, "step": 13025 }, { "epoch": 0.4664887997564775, "grad_norm": 1.995154619216919, "learning_rate": 0.00011564631054990546, "loss": 1.618, "step": 13026 }, { "epoch": 0.4665246118860458, "grad_norm": 1.3552685976028442, "learning_rate": 0.00011563485428746226, "loss": 1.4857, "step": 13027 }, { "epoch": 0.4665604240156141, "grad_norm": 1.694174885749817, "learning_rate": 0.00011562339781467226, "loss": 1.5152, "step": 13028 }, { "epoch": 0.46659623614518236, "grad_norm": 2.1866040229797363, "learning_rate": 0.00011561194113168958, "loss": 1.5322, "step": 13029 }, { "epoch": 0.46663204827475063, "grad_norm": 1.6240884065628052, "learning_rate": 0.0001156004842386684, "loss": 1.6096, "step": 13030 }, { "epoch": 0.46666786040431896, "grad_norm": 1.6429814100265503, "learning_rate": 0.0001155890271357628, "loss": 1.136, "step": 13031 }, { "epoch": 0.4667036725338872, "grad_norm": 1.7728595733642578, "learning_rate": 0.00011557756982312699, "loss": 1.627, "step": 13032 }, { "epoch": 0.4667394846634555, "grad_norm": 1.5901148319244385, "learning_rate": 0.00011556611230091502, "loss": 1.7406, "step": 13033 }, { "epoch": 0.4667752967930238, "grad_norm": 1.8100167512893677, "learning_rate": 0.00011555465456928114, "loss": 1.3819, "step": 13034 }, { "epoch": 0.4668111089225921, "grad_norm": 1.715928316116333, "learning_rate": 0.0001155431966283794, "loss": 1.0788, "step": 13035 }, { "epoch": 0.46684692105216036, "grad_norm": 1.3283747434616089, "learning_rate": 0.00011553173847836403, "loss": 1.3998, "step": 13036 }, { "epoch": 0.4668827331817286, "grad_norm": 2.5385360717773438, "learning_rate": 0.00011552028011938913, "loss": 1.2955, "step": 13037 }, { "epoch": 0.46691854531129695, "grad_norm": 1.693988561630249, "learning_rate": 0.0001155088215516089, "loss": 1.2398, "step": 13038 }, { "epoch": 0.4669543574408652, "grad_norm": 1.468666911125183, "learning_rate": 0.00011549736277517746, "loss": 1.5346, "step": 13039 }, { "epoch": 0.4669901695704335, "grad_norm": 1.6999561786651611, "learning_rate": 0.00011548590379024904, "loss": 1.4972, "step": 13040 }, { "epoch": 0.4670259817000018, "grad_norm": 2.801893711090088, "learning_rate": 0.00011547444459697772, "loss": 1.5316, "step": 13041 }, { "epoch": 0.4670617938295701, "grad_norm": 1.450239658355713, "learning_rate": 0.00011546298519551771, "loss": 1.4053, "step": 13042 }, { "epoch": 0.46709760595913835, "grad_norm": 1.53555428981781, "learning_rate": 0.00011545152558602319, "loss": 1.6854, "step": 13043 }, { "epoch": 0.4671334180887066, "grad_norm": 1.7407196760177612, "learning_rate": 0.00011544006576864832, "loss": 1.4238, "step": 13044 }, { "epoch": 0.46716923021827494, "grad_norm": 1.4221725463867188, "learning_rate": 0.00011542860574354727, "loss": 1.6033, "step": 13045 }, { "epoch": 0.4672050423478432, "grad_norm": 1.597596526145935, "learning_rate": 0.00011541714551087423, "loss": 1.5698, "step": 13046 }, { "epoch": 0.4672408544774115, "grad_norm": 1.6821784973144531, "learning_rate": 0.00011540568507078342, "loss": 1.4342, "step": 13047 }, { "epoch": 0.4672766666069798, "grad_norm": 1.4949378967285156, "learning_rate": 0.00011539422442342895, "loss": 1.3515, "step": 13048 }, { "epoch": 0.4673124787365481, "grad_norm": 1.5018175840377808, "learning_rate": 0.00011538276356896507, "loss": 1.4951, "step": 13049 }, { "epoch": 0.46734829086611634, "grad_norm": 1.6001633405685425, "learning_rate": 0.00011537130250754595, "loss": 1.5004, "step": 13050 }, { "epoch": 0.4673841029956846, "grad_norm": 1.7056968212127686, "learning_rate": 0.00011535984123932578, "loss": 1.2113, "step": 13051 }, { "epoch": 0.46741991512525294, "grad_norm": 1.4191452264785767, "learning_rate": 0.00011534837976445875, "loss": 1.1832, "step": 13052 }, { "epoch": 0.4674557272548212, "grad_norm": 1.751197338104248, "learning_rate": 0.00011533691808309905, "loss": 1.8554, "step": 13053 }, { "epoch": 0.4674915393843895, "grad_norm": 1.9440951347351074, "learning_rate": 0.00011532545619540094, "loss": 1.2174, "step": 13054 }, { "epoch": 0.4675273515139578, "grad_norm": 1.4406771659851074, "learning_rate": 0.00011531399410151855, "loss": 1.505, "step": 13055 }, { "epoch": 0.46756316364352607, "grad_norm": 1.4604896306991577, "learning_rate": 0.00011530253180160614, "loss": 1.5097, "step": 13056 }, { "epoch": 0.46759897577309434, "grad_norm": 1.3812698125839233, "learning_rate": 0.00011529106929581792, "loss": 0.9882, "step": 13057 }, { "epoch": 0.4676347879026626, "grad_norm": 1.4525638818740845, "learning_rate": 0.00011527960658430807, "loss": 1.4344, "step": 13058 }, { "epoch": 0.46767060003223093, "grad_norm": 1.5971357822418213, "learning_rate": 0.00011526814366723084, "loss": 1.4928, "step": 13059 }, { "epoch": 0.4677064121617992, "grad_norm": 2.0160109996795654, "learning_rate": 0.00011525668054474039, "loss": 1.2046, "step": 13060 }, { "epoch": 0.46774222429136747, "grad_norm": 1.648202657699585, "learning_rate": 0.00011524521721699102, "loss": 1.3332, "step": 13061 }, { "epoch": 0.4677780364209358, "grad_norm": 1.7077761888504028, "learning_rate": 0.0001152337536841369, "loss": 1.5201, "step": 13062 }, { "epoch": 0.46781384855050406, "grad_norm": 2.0169153213500977, "learning_rate": 0.00011522228994633229, "loss": 1.6287, "step": 13063 }, { "epoch": 0.46784966068007233, "grad_norm": 1.86139976978302, "learning_rate": 0.00011521082600373136, "loss": 1.4691, "step": 13064 }, { "epoch": 0.4678854728096406, "grad_norm": 1.6686205863952637, "learning_rate": 0.00011519936185648842, "loss": 1.3001, "step": 13065 }, { "epoch": 0.4679212849392089, "grad_norm": 1.5188180208206177, "learning_rate": 0.0001151878975047577, "loss": 1.4057, "step": 13066 }, { "epoch": 0.4679570970687772, "grad_norm": 1.5588150024414062, "learning_rate": 0.00011517643294869339, "loss": 1.4067, "step": 13067 }, { "epoch": 0.46799290919834546, "grad_norm": 1.6581311225891113, "learning_rate": 0.00011516496818844972, "loss": 1.6517, "step": 13068 }, { "epoch": 0.4680287213279138, "grad_norm": 1.5204647779464722, "learning_rate": 0.000115153503224181, "loss": 1.6362, "step": 13069 }, { "epoch": 0.46806453345748206, "grad_norm": 1.700719952583313, "learning_rate": 0.00011514203805604142, "loss": 1.2189, "step": 13070 }, { "epoch": 0.4681003455870503, "grad_norm": 2.6246724128723145, "learning_rate": 0.00011513057268418526, "loss": 1.6423, "step": 13071 }, { "epoch": 0.4681361577166186, "grad_norm": 1.727502465248108, "learning_rate": 0.00011511910710876677, "loss": 1.7205, "step": 13072 }, { "epoch": 0.4681719698461869, "grad_norm": 1.5376635789871216, "learning_rate": 0.00011510764132994016, "loss": 1.5014, "step": 13073 }, { "epoch": 0.4682077819757552, "grad_norm": 1.3821709156036377, "learning_rate": 0.00011509617534785976, "loss": 1.5177, "step": 13074 }, { "epoch": 0.46824359410532346, "grad_norm": 1.698994755744934, "learning_rate": 0.00011508470916267978, "loss": 1.4925, "step": 13075 }, { "epoch": 0.4682794062348918, "grad_norm": 1.7280097007751465, "learning_rate": 0.00011507324277455452, "loss": 1.3431, "step": 13076 }, { "epoch": 0.46831521836446005, "grad_norm": 1.7975046634674072, "learning_rate": 0.00011506177618363818, "loss": 1.5999, "step": 13077 }, { "epoch": 0.4683510304940283, "grad_norm": 1.7372099161148071, "learning_rate": 0.00011505030939008508, "loss": 1.3853, "step": 13078 }, { "epoch": 0.4683868426235966, "grad_norm": 1.693571925163269, "learning_rate": 0.0001150388423940495, "loss": 1.3432, "step": 13079 }, { "epoch": 0.4684226547531649, "grad_norm": 1.4951120615005493, "learning_rate": 0.00011502737519568567, "loss": 1.7497, "step": 13080 }, { "epoch": 0.4684584668827332, "grad_norm": 1.509081482887268, "learning_rate": 0.00011501590779514793, "loss": 1.5924, "step": 13081 }, { "epoch": 0.46849427901230145, "grad_norm": 2.52064847946167, "learning_rate": 0.00011500444019259047, "loss": 1.4573, "step": 13082 }, { "epoch": 0.4685300911418698, "grad_norm": 1.4621198177337646, "learning_rate": 0.00011499297238816767, "loss": 1.5936, "step": 13083 }, { "epoch": 0.46856590327143804, "grad_norm": 1.402459979057312, "learning_rate": 0.00011498150438203373, "loss": 1.7086, "step": 13084 }, { "epoch": 0.4686017154010063, "grad_norm": 1.5725468397140503, "learning_rate": 0.00011497003617434301, "loss": 1.4823, "step": 13085 }, { "epoch": 0.4686375275305746, "grad_norm": 2.153813123703003, "learning_rate": 0.00011495856776524971, "loss": 1.2271, "step": 13086 }, { "epoch": 0.4686733396601429, "grad_norm": 1.4646168947219849, "learning_rate": 0.00011494709915490822, "loss": 1.4015, "step": 13087 }, { "epoch": 0.4687091517897112, "grad_norm": 1.6754770278930664, "learning_rate": 0.00011493563034347277, "loss": 1.5845, "step": 13088 }, { "epoch": 0.46874496391927944, "grad_norm": 1.6596485376358032, "learning_rate": 0.00011492416133109769, "loss": 1.3597, "step": 13089 }, { "epoch": 0.46878077604884777, "grad_norm": 1.3753113746643066, "learning_rate": 0.00011491269211793725, "loss": 1.4336, "step": 13090 }, { "epoch": 0.46881658817841604, "grad_norm": 1.4884370565414429, "learning_rate": 0.00011490122270414578, "loss": 1.3863, "step": 13091 }, { "epoch": 0.4688524003079843, "grad_norm": 2.0943214893341064, "learning_rate": 0.0001148897530898776, "loss": 1.1964, "step": 13092 }, { "epoch": 0.4688882124375526, "grad_norm": 1.4185950756072998, "learning_rate": 0.000114878283275287, "loss": 1.3801, "step": 13093 }, { "epoch": 0.4689240245671209, "grad_norm": 1.359785795211792, "learning_rate": 0.00011486681326052828, "loss": 1.501, "step": 13094 }, { "epoch": 0.46895983669668917, "grad_norm": 1.7992392778396606, "learning_rate": 0.00011485534304575575, "loss": 1.6647, "step": 13095 }, { "epoch": 0.46899564882625744, "grad_norm": 1.468299150466919, "learning_rate": 0.00011484387263112377, "loss": 1.3893, "step": 13096 }, { "epoch": 0.46903146095582576, "grad_norm": 1.3851277828216553, "learning_rate": 0.0001148324020167866, "loss": 1.0484, "step": 13097 }, { "epoch": 0.46906727308539403, "grad_norm": 2.560601234436035, "learning_rate": 0.0001148209312028986, "loss": 1.1958, "step": 13098 }, { "epoch": 0.4691030852149623, "grad_norm": 1.7320241928100586, "learning_rate": 0.0001148094601896141, "loss": 1.3779, "step": 13099 }, { "epoch": 0.46913889734453057, "grad_norm": 2.7635598182678223, "learning_rate": 0.00011479798897708742, "loss": 1.9673, "step": 13100 }, { "epoch": 0.4691747094740989, "grad_norm": 2.2257347106933594, "learning_rate": 0.00011478651756547287, "loss": 1.7453, "step": 13101 }, { "epoch": 0.46921052160366716, "grad_norm": 1.73111891746521, "learning_rate": 0.00011477504595492481, "loss": 1.1503, "step": 13102 }, { "epoch": 0.46924633373323543, "grad_norm": 1.4754983186721802, "learning_rate": 0.00011476357414559757, "loss": 1.5525, "step": 13103 }, { "epoch": 0.46928214586280376, "grad_norm": 3.5211539268493652, "learning_rate": 0.00011475210213764547, "loss": 1.7384, "step": 13104 }, { "epoch": 0.469317957992372, "grad_norm": 1.7858822345733643, "learning_rate": 0.00011474062993122288, "loss": 1.4745, "step": 13105 }, { "epoch": 0.4693537701219403, "grad_norm": 1.6460684537887573, "learning_rate": 0.0001147291575264841, "loss": 1.7301, "step": 13106 }, { "epoch": 0.46938958225150856, "grad_norm": 1.3619961738586426, "learning_rate": 0.00011471768492358354, "loss": 1.2949, "step": 13107 }, { "epoch": 0.4694253943810769, "grad_norm": 1.6338173151016235, "learning_rate": 0.00011470621212267547, "loss": 1.3691, "step": 13108 }, { "epoch": 0.46946120651064516, "grad_norm": 1.5736229419708252, "learning_rate": 0.00011469473912391433, "loss": 1.38, "step": 13109 }, { "epoch": 0.4694970186402134, "grad_norm": 1.666130542755127, "learning_rate": 0.0001146832659274544, "loss": 1.5988, "step": 13110 }, { "epoch": 0.46953283076978175, "grad_norm": 2.119744062423706, "learning_rate": 0.00011467179253345008, "loss": 1.6468, "step": 13111 }, { "epoch": 0.46956864289935, "grad_norm": 2.0681087970733643, "learning_rate": 0.00011466031894205574, "loss": 1.404, "step": 13112 }, { "epoch": 0.4696044550289183, "grad_norm": 1.5972648859024048, "learning_rate": 0.00011464884515342568, "loss": 1.4593, "step": 13113 }, { "epoch": 0.46964026715848656, "grad_norm": 1.4360387325286865, "learning_rate": 0.00011463737116771434, "loss": 1.2351, "step": 13114 }, { "epoch": 0.4696760792880549, "grad_norm": 1.9295847415924072, "learning_rate": 0.00011462589698507603, "loss": 1.6692, "step": 13115 }, { "epoch": 0.46971189141762315, "grad_norm": 1.6461082696914673, "learning_rate": 0.00011461442260566513, "loss": 1.2059, "step": 13116 }, { "epoch": 0.4697477035471914, "grad_norm": 2.095632791519165, "learning_rate": 0.00011460294802963602, "loss": 1.2712, "step": 13117 }, { "epoch": 0.46978351567675974, "grad_norm": 2.0552680492401123, "learning_rate": 0.00011459147325714312, "loss": 1.4689, "step": 13118 }, { "epoch": 0.469819327806328, "grad_norm": 1.3072434663772583, "learning_rate": 0.00011457999828834073, "loss": 1.5075, "step": 13119 }, { "epoch": 0.4698551399358963, "grad_norm": 2.8187243938446045, "learning_rate": 0.00011456852312338331, "loss": 1.3692, "step": 13120 }, { "epoch": 0.46989095206546455, "grad_norm": 1.7331218719482422, "learning_rate": 0.00011455704776242517, "loss": 1.6142, "step": 13121 }, { "epoch": 0.4699267641950329, "grad_norm": 1.6014031171798706, "learning_rate": 0.00011454557220562074, "loss": 1.3266, "step": 13122 }, { "epoch": 0.46996257632460114, "grad_norm": 1.5360310077667236, "learning_rate": 0.0001145340964531244, "loss": 1.4116, "step": 13123 }, { "epoch": 0.4699983884541694, "grad_norm": 1.7113350629806519, "learning_rate": 0.00011452262050509053, "loss": 1.3728, "step": 13124 }, { "epoch": 0.47003420058373774, "grad_norm": 2.2795071601867676, "learning_rate": 0.00011451114436167356, "loss": 1.5459, "step": 13125 }, { "epoch": 0.470070012713306, "grad_norm": 1.575621247291565, "learning_rate": 0.00011449966802302783, "loss": 1.7569, "step": 13126 }, { "epoch": 0.4701058248428743, "grad_norm": 2.293635129928589, "learning_rate": 0.0001144881914893078, "loss": 1.1277, "step": 13127 }, { "epoch": 0.47014163697244254, "grad_norm": 1.592656135559082, "learning_rate": 0.00011447671476066781, "loss": 1.4337, "step": 13128 }, { "epoch": 0.47017744910201087, "grad_norm": 1.407986044883728, "learning_rate": 0.00011446523783726235, "loss": 1.4221, "step": 13129 }, { "epoch": 0.47021326123157914, "grad_norm": 1.9266597032546997, "learning_rate": 0.00011445376071924572, "loss": 1.4518, "step": 13130 }, { "epoch": 0.4702490733611474, "grad_norm": 1.3258838653564453, "learning_rate": 0.00011444228340677241, "loss": 1.705, "step": 13131 }, { "epoch": 0.47028488549071573, "grad_norm": 1.457871675491333, "learning_rate": 0.00011443080589999677, "loss": 1.695, "step": 13132 }, { "epoch": 0.470320697620284, "grad_norm": 1.8480134010314941, "learning_rate": 0.00011441932819907328, "loss": 1.5101, "step": 13133 }, { "epoch": 0.47035650974985227, "grad_norm": 2.691573143005371, "learning_rate": 0.00011440785030415633, "loss": 1.2265, "step": 13134 }, { "epoch": 0.47039232187942054, "grad_norm": 1.8243192434310913, "learning_rate": 0.00011439637221540031, "loss": 1.8265, "step": 13135 }, { "epoch": 0.47042813400898886, "grad_norm": 1.973116397857666, "learning_rate": 0.00011438489393295973, "loss": 1.5911, "step": 13136 }, { "epoch": 0.47046394613855713, "grad_norm": 1.8532606363296509, "learning_rate": 0.00011437341545698892, "loss": 1.4446, "step": 13137 }, { "epoch": 0.4704997582681254, "grad_norm": 1.6671091318130493, "learning_rate": 0.00011436193678764236, "loss": 1.3737, "step": 13138 }, { "epoch": 0.4705355703976937, "grad_norm": 1.4185718297958374, "learning_rate": 0.00011435045792507443, "loss": 1.4509, "step": 13139 }, { "epoch": 0.470571382527262, "grad_norm": 1.73945152759552, "learning_rate": 0.00011433897886943965, "loss": 1.8183, "step": 13140 }, { "epoch": 0.47060719465683026, "grad_norm": 2.0083441734313965, "learning_rate": 0.00011432749962089235, "loss": 1.1479, "step": 13141 }, { "epoch": 0.47064300678639853, "grad_norm": 2.459001064300537, "learning_rate": 0.00011431602017958707, "loss": 1.3052, "step": 13142 }, { "epoch": 0.47067881891596686, "grad_norm": 2.419342041015625, "learning_rate": 0.00011430454054567819, "loss": 1.6516, "step": 13143 }, { "epoch": 0.4707146310455351, "grad_norm": 1.7359721660614014, "learning_rate": 0.00011429306071932018, "loss": 1.2583, "step": 13144 }, { "epoch": 0.4707504431751034, "grad_norm": 1.5329176187515259, "learning_rate": 0.00011428158070066743, "loss": 1.6391, "step": 13145 }, { "epoch": 0.4707862553046717, "grad_norm": 2.4268431663513184, "learning_rate": 0.00011427010048987448, "loss": 1.8331, "step": 13146 }, { "epoch": 0.47082206743424, "grad_norm": 2.1451423168182373, "learning_rate": 0.00011425862008709574, "loss": 1.4687, "step": 13147 }, { "epoch": 0.47085787956380826, "grad_norm": 1.776679515838623, "learning_rate": 0.00011424713949248562, "loss": 1.4487, "step": 13148 }, { "epoch": 0.4708936916933765, "grad_norm": 1.735393762588501, "learning_rate": 0.00011423565870619863, "loss": 1.439, "step": 13149 }, { "epoch": 0.47092950382294485, "grad_norm": 1.2827190160751343, "learning_rate": 0.00011422417772838923, "loss": 1.2839, "step": 13150 }, { "epoch": 0.4709653159525131, "grad_norm": 1.7964860200881958, "learning_rate": 0.00011421269655921185, "loss": 1.345, "step": 13151 }, { "epoch": 0.4710011280820814, "grad_norm": 1.468343734741211, "learning_rate": 0.00011420121519882096, "loss": 1.4463, "step": 13152 }, { "epoch": 0.4710369402116497, "grad_norm": 1.5348808765411377, "learning_rate": 0.00011418973364737107, "loss": 1.5609, "step": 13153 }, { "epoch": 0.471072752341218, "grad_norm": 1.6379225254058838, "learning_rate": 0.00011417825190501658, "loss": 1.3981, "step": 13154 }, { "epoch": 0.47110856447078625, "grad_norm": 1.5981770753860474, "learning_rate": 0.00011416676997191205, "loss": 1.5883, "step": 13155 }, { "epoch": 0.4711443766003545, "grad_norm": 1.8267465829849243, "learning_rate": 0.00011415528784821188, "loss": 1.6164, "step": 13156 }, { "epoch": 0.47118018872992284, "grad_norm": 1.5526596307754517, "learning_rate": 0.00011414380553407055, "loss": 1.382, "step": 13157 }, { "epoch": 0.4712160008594911, "grad_norm": 2.0587666034698486, "learning_rate": 0.00011413232302964258, "loss": 1.7454, "step": 13158 }, { "epoch": 0.4712518129890594, "grad_norm": 1.841937780380249, "learning_rate": 0.00011412084033508242, "loss": 1.4733, "step": 13159 }, { "epoch": 0.4712876251186277, "grad_norm": 2.0795516967773438, "learning_rate": 0.00011410935745054459, "loss": 1.391, "step": 13160 }, { "epoch": 0.471323437248196, "grad_norm": 1.4707566499710083, "learning_rate": 0.00011409787437618353, "loss": 1.6325, "step": 13161 }, { "epoch": 0.47135924937776424, "grad_norm": 2.5612967014312744, "learning_rate": 0.00011408639111215378, "loss": 1.6126, "step": 13162 }, { "epoch": 0.4713950615073325, "grad_norm": 1.5790444612503052, "learning_rate": 0.00011407490765860978, "loss": 1.395, "step": 13163 }, { "epoch": 0.47143087363690084, "grad_norm": 1.65281081199646, "learning_rate": 0.00011406342401570609, "loss": 1.5043, "step": 13164 }, { "epoch": 0.4714666857664691, "grad_norm": 1.3939710855484009, "learning_rate": 0.00011405194018359715, "loss": 1.4143, "step": 13165 }, { "epoch": 0.4715024978960374, "grad_norm": 1.7264175415039062, "learning_rate": 0.00011404045616243745, "loss": 1.6513, "step": 13166 }, { "epoch": 0.4715383100256057, "grad_norm": 1.5612338781356812, "learning_rate": 0.00011402897195238158, "loss": 1.8289, "step": 13167 }, { "epoch": 0.47157412215517397, "grad_norm": 1.8469725847244263, "learning_rate": 0.00011401748755358395, "loss": 1.3954, "step": 13168 }, { "epoch": 0.47160993428474224, "grad_norm": 1.5684376955032349, "learning_rate": 0.00011400600296619912, "loss": 1.5721, "step": 13169 }, { "epoch": 0.4716457464143105, "grad_norm": 1.6736838817596436, "learning_rate": 0.00011399451819038159, "loss": 1.3776, "step": 13170 }, { "epoch": 0.47168155854387883, "grad_norm": 1.5765982866287231, "learning_rate": 0.00011398303322628585, "loss": 1.5997, "step": 13171 }, { "epoch": 0.4717173706734471, "grad_norm": 1.5754213333129883, "learning_rate": 0.00011397154807406645, "loss": 1.3318, "step": 13172 }, { "epoch": 0.47175318280301537, "grad_norm": 1.5405632257461548, "learning_rate": 0.00011396006273387792, "loss": 1.2578, "step": 13173 }, { "epoch": 0.47178899493258364, "grad_norm": 1.6634725332260132, "learning_rate": 0.0001139485772058747, "loss": 1.7118, "step": 13174 }, { "epoch": 0.47182480706215196, "grad_norm": 1.8009767532348633, "learning_rate": 0.0001139370914902114, "loss": 1.5235, "step": 13175 }, { "epoch": 0.47186061919172023, "grad_norm": 2.3448922634124756, "learning_rate": 0.00011392560558704249, "loss": 1.4457, "step": 13176 }, { "epoch": 0.4718964313212885, "grad_norm": 1.6356074810028076, "learning_rate": 0.00011391411949652253, "loss": 1.4746, "step": 13177 }, { "epoch": 0.4719322434508568, "grad_norm": 2.3725569248199463, "learning_rate": 0.00011390263321880605, "loss": 1.4069, "step": 13178 }, { "epoch": 0.4719680555804251, "grad_norm": 1.9027031660079956, "learning_rate": 0.00011389114675404755, "loss": 1.6479, "step": 13179 }, { "epoch": 0.47200386770999336, "grad_norm": 1.7592540979385376, "learning_rate": 0.00011387966010240161, "loss": 1.5657, "step": 13180 }, { "epoch": 0.47203967983956163, "grad_norm": 3.3529903888702393, "learning_rate": 0.00011386817326402273, "loss": 1.5566, "step": 13181 }, { "epoch": 0.47207549196912996, "grad_norm": 2.228374481201172, "learning_rate": 0.00011385668623906551, "loss": 1.553, "step": 13182 }, { "epoch": 0.4721113040986982, "grad_norm": 1.72072434425354, "learning_rate": 0.00011384519902768441, "loss": 1.503, "step": 13183 }, { "epoch": 0.4721471162282665, "grad_norm": 1.6252473592758179, "learning_rate": 0.00011383371163003403, "loss": 1.2247, "step": 13184 }, { "epoch": 0.4721829283578348, "grad_norm": 2.5225577354431152, "learning_rate": 0.00011382222404626888, "loss": 1.4804, "step": 13185 }, { "epoch": 0.4722187404874031, "grad_norm": 1.921576738357544, "learning_rate": 0.00011381073627654357, "loss": 1.7643, "step": 13186 }, { "epoch": 0.47225455261697136, "grad_norm": 1.2695116996765137, "learning_rate": 0.00011379924832101258, "loss": 1.6016, "step": 13187 }, { "epoch": 0.4722903647465396, "grad_norm": 1.5697298049926758, "learning_rate": 0.00011378776017983053, "loss": 1.5873, "step": 13188 }, { "epoch": 0.47232617687610795, "grad_norm": 1.467513084411621, "learning_rate": 0.00011377627185315194, "loss": 1.318, "step": 13189 }, { "epoch": 0.4723619890056762, "grad_norm": 1.839012861251831, "learning_rate": 0.00011376478334113139, "loss": 1.3583, "step": 13190 }, { "epoch": 0.4723978011352445, "grad_norm": 1.906954050064087, "learning_rate": 0.00011375329464392343, "loss": 1.7615, "step": 13191 }, { "epoch": 0.4724336132648128, "grad_norm": 2.299314022064209, "learning_rate": 0.00011374180576168263, "loss": 1.3546, "step": 13192 }, { "epoch": 0.4724694253943811, "grad_norm": 1.6634674072265625, "learning_rate": 0.00011373031669456358, "loss": 1.3662, "step": 13193 }, { "epoch": 0.47250523752394935, "grad_norm": 1.2625876665115356, "learning_rate": 0.0001137188274427208, "loss": 1.4021, "step": 13194 }, { "epoch": 0.4725410496535176, "grad_norm": 1.9035825729370117, "learning_rate": 0.00011370733800630892, "loss": 1.4559, "step": 13195 }, { "epoch": 0.47257686178308594, "grad_norm": 1.5406285524368286, "learning_rate": 0.00011369584838548246, "loss": 1.3728, "step": 13196 }, { "epoch": 0.4726126739126542, "grad_norm": 1.461430311203003, "learning_rate": 0.00011368435858039605, "loss": 1.3152, "step": 13197 }, { "epoch": 0.4726484860422225, "grad_norm": 1.4157794713974, "learning_rate": 0.00011367286859120423, "loss": 1.328, "step": 13198 }, { "epoch": 0.4726842981717908, "grad_norm": 1.5689760446548462, "learning_rate": 0.00011366137841806161, "loss": 1.5654, "step": 13199 }, { "epoch": 0.4727201103013591, "grad_norm": 1.5379902124404907, "learning_rate": 0.00011364988806112278, "loss": 1.5379, "step": 13200 }, { "epoch": 0.47275592243092734, "grad_norm": 2.011894941329956, "learning_rate": 0.00011363839752054228, "loss": 1.8056, "step": 13201 }, { "epoch": 0.4727917345604956, "grad_norm": 1.3880537748336792, "learning_rate": 0.00011362690679647477, "loss": 1.5678, "step": 13202 }, { "epoch": 0.47282754669006394, "grad_norm": 1.6055556535720825, "learning_rate": 0.00011361541588907477, "loss": 1.6305, "step": 13203 }, { "epoch": 0.4728633588196322, "grad_norm": 1.4604688882827759, "learning_rate": 0.00011360392479849693, "loss": 1.4214, "step": 13204 }, { "epoch": 0.4728991709492005, "grad_norm": 1.9168239831924438, "learning_rate": 0.00011359243352489581, "loss": 1.7095, "step": 13205 }, { "epoch": 0.4729349830787688, "grad_norm": 1.8139801025390625, "learning_rate": 0.00011358094206842607, "loss": 1.5206, "step": 13206 }, { "epoch": 0.47297079520833707, "grad_norm": 1.9469952583312988, "learning_rate": 0.00011356945042924223, "loss": 1.613, "step": 13207 }, { "epoch": 0.47300660733790534, "grad_norm": 1.7959978580474854, "learning_rate": 0.00011355795860749899, "loss": 1.53, "step": 13208 }, { "epoch": 0.4730424194674736, "grad_norm": 1.6030470132827759, "learning_rate": 0.00011354646660335086, "loss": 1.4548, "step": 13209 }, { "epoch": 0.47307823159704193, "grad_norm": 2.472611904144287, "learning_rate": 0.00011353497441695251, "loss": 1.3722, "step": 13210 }, { "epoch": 0.4731140437266102, "grad_norm": 1.9840824604034424, "learning_rate": 0.00011352348204845853, "loss": 1.2221, "step": 13211 }, { "epoch": 0.47314985585617847, "grad_norm": 1.8577934503555298, "learning_rate": 0.00011351198949802355, "loss": 1.0622, "step": 13212 }, { "epoch": 0.4731856679857468, "grad_norm": 1.9593387842178345, "learning_rate": 0.0001135004967658022, "loss": 1.6045, "step": 13213 }, { "epoch": 0.47322148011531506, "grad_norm": 1.7175911664962769, "learning_rate": 0.00011348900385194903, "loss": 1.5449, "step": 13214 }, { "epoch": 0.47325729224488333, "grad_norm": 1.7575346231460571, "learning_rate": 0.00011347751075661876, "loss": 1.5037, "step": 13215 }, { "epoch": 0.4732931043744516, "grad_norm": 2.017416477203369, "learning_rate": 0.00011346601747996595, "loss": 1.57, "step": 13216 }, { "epoch": 0.4733289165040199, "grad_norm": 1.4869771003723145, "learning_rate": 0.00011345452402214527, "loss": 1.5248, "step": 13217 }, { "epoch": 0.4733647286335882, "grad_norm": 1.63876473903656, "learning_rate": 0.0001134430303833113, "loss": 1.5725, "step": 13218 }, { "epoch": 0.47340054076315646, "grad_norm": 1.4624106884002686, "learning_rate": 0.00011343153656361867, "loss": 1.3269, "step": 13219 }, { "epoch": 0.4734363528927248, "grad_norm": 2.3383195400238037, "learning_rate": 0.00011342004256322208, "loss": 1.415, "step": 13220 }, { "epoch": 0.47347216502229306, "grad_norm": 1.4437799453735352, "learning_rate": 0.00011340854838227611, "loss": 1.6656, "step": 13221 }, { "epoch": 0.4735079771518613, "grad_norm": 1.8068561553955078, "learning_rate": 0.00011339705402093543, "loss": 1.8916, "step": 13222 }, { "epoch": 0.4735437892814296, "grad_norm": 2.249796152114868, "learning_rate": 0.00011338555947935465, "loss": 1.6621, "step": 13223 }, { "epoch": 0.4735796014109979, "grad_norm": 1.4828672409057617, "learning_rate": 0.00011337406475768846, "loss": 1.7506, "step": 13224 }, { "epoch": 0.4736154135405662, "grad_norm": 1.364309310913086, "learning_rate": 0.00011336256985609144, "loss": 1.5618, "step": 13225 }, { "epoch": 0.47365122567013446, "grad_norm": 1.287990689277649, "learning_rate": 0.00011335107477471834, "loss": 1.3136, "step": 13226 }, { "epoch": 0.4736870377997028, "grad_norm": 2.1887640953063965, "learning_rate": 0.00011333957951372372, "loss": 1.2726, "step": 13227 }, { "epoch": 0.47372284992927105, "grad_norm": 1.8509095907211304, "learning_rate": 0.00011332808407326225, "loss": 1.3898, "step": 13228 }, { "epoch": 0.4737586620588393, "grad_norm": 1.4751075506210327, "learning_rate": 0.0001133165884534886, "loss": 1.7061, "step": 13229 }, { "epoch": 0.4737944741884076, "grad_norm": 1.6264978647232056, "learning_rate": 0.00011330509265455745, "loss": 1.5171, "step": 13230 }, { "epoch": 0.4738302863179759, "grad_norm": 1.8143638372421265, "learning_rate": 0.00011329359667662342, "loss": 1.2657, "step": 13231 }, { "epoch": 0.4738660984475442, "grad_norm": 1.415449619293213, "learning_rate": 0.00011328210051984118, "loss": 1.6253, "step": 13232 }, { "epoch": 0.47390191057711245, "grad_norm": 1.2469733953475952, "learning_rate": 0.00011327060418436545, "loss": 1.4911, "step": 13233 }, { "epoch": 0.4739377227066808, "grad_norm": 1.2812349796295166, "learning_rate": 0.00011325910767035086, "loss": 1.2655, "step": 13234 }, { "epoch": 0.47397353483624904, "grad_norm": 1.9594454765319824, "learning_rate": 0.00011324761097795206, "loss": 1.7503, "step": 13235 }, { "epoch": 0.4740093469658173, "grad_norm": 1.5524890422821045, "learning_rate": 0.00011323611410732375, "loss": 1.5084, "step": 13236 }, { "epoch": 0.4740451590953856, "grad_norm": 1.7627413272857666, "learning_rate": 0.0001132246170586206, "loss": 1.5972, "step": 13237 }, { "epoch": 0.4740809712249539, "grad_norm": 1.644841194152832, "learning_rate": 0.00011321311983199727, "loss": 1.7392, "step": 13238 }, { "epoch": 0.4741167833545222, "grad_norm": 1.3779560327529907, "learning_rate": 0.00011320162242760848, "loss": 1.4715, "step": 13239 }, { "epoch": 0.47415259548409044, "grad_norm": 1.7711284160614014, "learning_rate": 0.00011319012484560885, "loss": 1.5618, "step": 13240 }, { "epoch": 0.47418840761365877, "grad_norm": 1.8239142894744873, "learning_rate": 0.00011317862708615314, "loss": 1.6008, "step": 13241 }, { "epoch": 0.47422421974322704, "grad_norm": 1.5546090602874756, "learning_rate": 0.00011316712914939598, "loss": 1.5524, "step": 13242 }, { "epoch": 0.4742600318727953, "grad_norm": 1.3858740329742432, "learning_rate": 0.00011315563103549211, "loss": 1.0793, "step": 13243 }, { "epoch": 0.4742958440023636, "grad_norm": 2.5385050773620605, "learning_rate": 0.00011314413274459618, "loss": 1.5313, "step": 13244 }, { "epoch": 0.4743316561319319, "grad_norm": 2.2116689682006836, "learning_rate": 0.0001131326342768629, "loss": 1.6439, "step": 13245 }, { "epoch": 0.47436746826150017, "grad_norm": 1.4536186456680298, "learning_rate": 0.00011312113563244695, "loss": 1.6257, "step": 13246 }, { "epoch": 0.47440328039106844, "grad_norm": 2.8695340156555176, "learning_rate": 0.00011310963681150304, "loss": 1.7846, "step": 13247 }, { "epoch": 0.47443909252063676, "grad_norm": 1.562320590019226, "learning_rate": 0.0001130981378141859, "loss": 1.8618, "step": 13248 }, { "epoch": 0.47447490465020503, "grad_norm": 1.378989815711975, "learning_rate": 0.0001130866386406502, "loss": 1.4203, "step": 13249 }, { "epoch": 0.4745107167797733, "grad_norm": 1.7417534589767456, "learning_rate": 0.00011307513929105067, "loss": 1.4547, "step": 13250 }, { "epoch": 0.47454652890934157, "grad_norm": 2.0014612674713135, "learning_rate": 0.000113063639765542, "loss": 1.3752, "step": 13251 }, { "epoch": 0.4745823410389099, "grad_norm": 1.651104211807251, "learning_rate": 0.00011305214006427892, "loss": 1.3597, "step": 13252 }, { "epoch": 0.47461815316847816, "grad_norm": 2.426689624786377, "learning_rate": 0.00011304064018741612, "loss": 1.6382, "step": 13253 }, { "epoch": 0.47465396529804643, "grad_norm": 1.799432635307312, "learning_rate": 0.0001130291401351083, "loss": 1.5994, "step": 13254 }, { "epoch": 0.47468977742761476, "grad_norm": 1.2933775186538696, "learning_rate": 0.00011301763990751025, "loss": 1.5075, "step": 13255 }, { "epoch": 0.474725589557183, "grad_norm": 1.687794804573059, "learning_rate": 0.00011300613950477661, "loss": 1.7965, "step": 13256 }, { "epoch": 0.4747614016867513, "grad_norm": 1.6241377592086792, "learning_rate": 0.00011299463892706217, "loss": 1.7507, "step": 13257 }, { "epoch": 0.47479721381631956, "grad_norm": 1.474320650100708, "learning_rate": 0.0001129831381745216, "loss": 1.4066, "step": 13258 }, { "epoch": 0.4748330259458879, "grad_norm": 1.6746569871902466, "learning_rate": 0.00011297163724730968, "loss": 1.5502, "step": 13259 }, { "epoch": 0.47486883807545616, "grad_norm": 1.6739968061447144, "learning_rate": 0.00011296013614558107, "loss": 0.9947, "step": 13260 }, { "epoch": 0.4749046502050244, "grad_norm": 1.6017934083938599, "learning_rate": 0.00011294863486949059, "loss": 1.1272, "step": 13261 }, { "epoch": 0.47494046233459275, "grad_norm": 1.7970075607299805, "learning_rate": 0.00011293713341919292, "loss": 1.5517, "step": 13262 }, { "epoch": 0.474976274464161, "grad_norm": 2.006655216217041, "learning_rate": 0.0001129256317948428, "loss": 1.7006, "step": 13263 }, { "epoch": 0.4750120865937293, "grad_norm": 1.580830693244934, "learning_rate": 0.00011291412999659499, "loss": 1.593, "step": 13264 }, { "epoch": 0.47504789872329756, "grad_norm": 1.7830466032028198, "learning_rate": 0.00011290262802460419, "loss": 1.3509, "step": 13265 }, { "epoch": 0.4750837108528659, "grad_norm": 1.5933284759521484, "learning_rate": 0.0001128911258790252, "loss": 1.3845, "step": 13266 }, { "epoch": 0.47511952298243415, "grad_norm": 1.4569121599197388, "learning_rate": 0.00011287962356001272, "loss": 1.4742, "step": 13267 }, { "epoch": 0.4751553351120024, "grad_norm": 1.6202170848846436, "learning_rate": 0.00011286812106772153, "loss": 1.5486, "step": 13268 }, { "epoch": 0.47519114724157074, "grad_norm": 1.7830826044082642, "learning_rate": 0.00011285661840230636, "loss": 1.2932, "step": 13269 }, { "epoch": 0.475226959371139, "grad_norm": 1.6476013660430908, "learning_rate": 0.000112845115563922, "loss": 1.6752, "step": 13270 }, { "epoch": 0.4752627715007073, "grad_norm": 1.4701865911483765, "learning_rate": 0.00011283361255272315, "loss": 1.2875, "step": 13271 }, { "epoch": 0.47529858363027555, "grad_norm": 1.7325506210327148, "learning_rate": 0.00011282210936886463, "loss": 1.4609, "step": 13272 }, { "epoch": 0.4753343957598439, "grad_norm": 1.6176021099090576, "learning_rate": 0.00011281060601250113, "loss": 1.2128, "step": 13273 }, { "epoch": 0.47537020788941214, "grad_norm": 1.9186975955963135, "learning_rate": 0.00011279910248378746, "loss": 1.6815, "step": 13274 }, { "epoch": 0.4754060200189804, "grad_norm": 1.3656400442123413, "learning_rate": 0.00011278759878287839, "loss": 1.6489, "step": 13275 }, { "epoch": 0.47544183214854874, "grad_norm": 1.6302509307861328, "learning_rate": 0.00011277609490992866, "loss": 1.5046, "step": 13276 }, { "epoch": 0.475477644278117, "grad_norm": 1.7330888509750366, "learning_rate": 0.00011276459086509305, "loss": 1.3758, "step": 13277 }, { "epoch": 0.4755134564076853, "grad_norm": 1.6934552192687988, "learning_rate": 0.00011275308664852635, "loss": 1.5643, "step": 13278 }, { "epoch": 0.47554926853725354, "grad_norm": 1.8864017724990845, "learning_rate": 0.00011274158226038334, "loss": 1.5, "step": 13279 }, { "epoch": 0.47558508066682187, "grad_norm": 1.9266653060913086, "learning_rate": 0.00011273007770081873, "loss": 1.5524, "step": 13280 }, { "epoch": 0.47562089279639014, "grad_norm": 1.5648225545883179, "learning_rate": 0.00011271857296998737, "loss": 1.3658, "step": 13281 }, { "epoch": 0.4756567049259584, "grad_norm": 1.9169940948486328, "learning_rate": 0.000112707068068044, "loss": 1.7119, "step": 13282 }, { "epoch": 0.47569251705552673, "grad_norm": 1.5786782503128052, "learning_rate": 0.00011269556299514346, "loss": 1.2967, "step": 13283 }, { "epoch": 0.475728329185095, "grad_norm": 1.858866810798645, "learning_rate": 0.00011268405775144044, "loss": 1.4823, "step": 13284 }, { "epoch": 0.47576414131466327, "grad_norm": 1.7044215202331543, "learning_rate": 0.00011267255233708982, "loss": 1.3542, "step": 13285 }, { "epoch": 0.47579995344423154, "grad_norm": 1.5558875799179077, "learning_rate": 0.00011266104675224633, "loss": 1.3713, "step": 13286 }, { "epoch": 0.47583576557379986, "grad_norm": 1.7449630498886108, "learning_rate": 0.00011264954099706481, "loss": 1.3809, "step": 13287 }, { "epoch": 0.47587157770336813, "grad_norm": 1.7783395051956177, "learning_rate": 0.00011263803507170005, "loss": 1.4655, "step": 13288 }, { "epoch": 0.4759073898329364, "grad_norm": 2.3066070079803467, "learning_rate": 0.00011262652897630678, "loss": 1.5637, "step": 13289 }, { "epoch": 0.4759432019625047, "grad_norm": 1.9367107152938843, "learning_rate": 0.0001126150227110399, "loss": 1.4018, "step": 13290 }, { "epoch": 0.475979014092073, "grad_norm": 1.6617939472198486, "learning_rate": 0.00011260351627605413, "loss": 1.2903, "step": 13291 }, { "epoch": 0.47601482622164126, "grad_norm": 1.5744348764419556, "learning_rate": 0.00011259200967150432, "loss": 1.2362, "step": 13292 }, { "epoch": 0.47605063835120953, "grad_norm": 2.4343421459198, "learning_rate": 0.00011258050289754524, "loss": 1.8115, "step": 13293 }, { "epoch": 0.47608645048077786, "grad_norm": 1.7691670656204224, "learning_rate": 0.00011256899595433175, "loss": 1.4662, "step": 13294 }, { "epoch": 0.4761222626103461, "grad_norm": 1.9748578071594238, "learning_rate": 0.0001125574888420186, "loss": 1.4279, "step": 13295 }, { "epoch": 0.4761580747399144, "grad_norm": 1.6512370109558105, "learning_rate": 0.00011254598156076066, "loss": 1.7022, "step": 13296 }, { "epoch": 0.4761938868694827, "grad_norm": 2.0833606719970703, "learning_rate": 0.00011253447411071274, "loss": 1.7322, "step": 13297 }, { "epoch": 0.476229698999051, "grad_norm": 1.4301164150238037, "learning_rate": 0.00011252296649202957, "loss": 1.4129, "step": 13298 }, { "epoch": 0.47626551112861926, "grad_norm": 1.6318367719650269, "learning_rate": 0.00011251145870486612, "loss": 1.3278, "step": 13299 }, { "epoch": 0.4763013232581875, "grad_norm": 1.5819051265716553, "learning_rate": 0.00011249995074937708, "loss": 1.6432, "step": 13300 }, { "epoch": 0.47633713538775585, "grad_norm": 1.7689194679260254, "learning_rate": 0.00011248844262571737, "loss": 1.4826, "step": 13301 }, { "epoch": 0.4763729475173241, "grad_norm": 1.4098398685455322, "learning_rate": 0.00011247693433404172, "loss": 1.0356, "step": 13302 }, { "epoch": 0.4764087596468924, "grad_norm": 1.6499768495559692, "learning_rate": 0.00011246542587450504, "loss": 1.6287, "step": 13303 }, { "epoch": 0.4764445717764607, "grad_norm": 1.453571081161499, "learning_rate": 0.00011245391724726213, "loss": 1.3817, "step": 13304 }, { "epoch": 0.476480383906029, "grad_norm": 3.2583165168762207, "learning_rate": 0.00011244240845246783, "loss": 1.4521, "step": 13305 }, { "epoch": 0.47651619603559725, "grad_norm": 1.647336721420288, "learning_rate": 0.00011243089949027699, "loss": 1.613, "step": 13306 }, { "epoch": 0.4765520081651655, "grad_norm": 1.7628077268600464, "learning_rate": 0.0001124193903608444, "loss": 1.5009, "step": 13307 }, { "epoch": 0.47658782029473384, "grad_norm": 1.7588131427764893, "learning_rate": 0.00011240788106432496, "loss": 1.691, "step": 13308 }, { "epoch": 0.4766236324243021, "grad_norm": 2.2901549339294434, "learning_rate": 0.00011239637160087346, "loss": 1.419, "step": 13309 }, { "epoch": 0.4766594445538704, "grad_norm": 1.769014596939087, "learning_rate": 0.00011238486197064479, "loss": 1.4347, "step": 13310 }, { "epoch": 0.4766952566834387, "grad_norm": 1.45048987865448, "learning_rate": 0.00011237335217379377, "loss": 1.4141, "step": 13311 }, { "epoch": 0.476731068813007, "grad_norm": 2.432612895965576, "learning_rate": 0.00011236184221047526, "loss": 1.8489, "step": 13312 }, { "epoch": 0.47676688094257524, "grad_norm": 1.4043242931365967, "learning_rate": 0.00011235033208084411, "loss": 1.5789, "step": 13313 }, { "epoch": 0.4768026930721435, "grad_norm": 1.6627109050750732, "learning_rate": 0.00011233882178505519, "loss": 1.4942, "step": 13314 }, { "epoch": 0.47683850520171184, "grad_norm": 2.114083766937256, "learning_rate": 0.00011232731132326331, "loss": 1.534, "step": 13315 }, { "epoch": 0.4768743173312801, "grad_norm": 1.5076711177825928, "learning_rate": 0.00011231580069562335, "loss": 1.359, "step": 13316 }, { "epoch": 0.4769101294608484, "grad_norm": 1.8410205841064453, "learning_rate": 0.0001123042899022902, "loss": 1.624, "step": 13317 }, { "epoch": 0.4769459415904167, "grad_norm": 1.6031441688537598, "learning_rate": 0.00011229277894341869, "loss": 1.6562, "step": 13318 }, { "epoch": 0.47698175371998497, "grad_norm": 1.749197006225586, "learning_rate": 0.0001122812678191637, "loss": 1.6577, "step": 13319 }, { "epoch": 0.47701756584955324, "grad_norm": 2.062668800354004, "learning_rate": 0.00011226975652968011, "loss": 1.5842, "step": 13320 }, { "epoch": 0.4770533779791215, "grad_norm": 2.1983795166015625, "learning_rate": 0.00011225824507512275, "loss": 1.3001, "step": 13321 }, { "epoch": 0.47708919010868983, "grad_norm": 1.686004400253296, "learning_rate": 0.00011224673345564651, "loss": 1.5864, "step": 13322 }, { "epoch": 0.4771250022382581, "grad_norm": 1.8390787839889526, "learning_rate": 0.0001122352216714063, "loss": 1.6522, "step": 13323 }, { "epoch": 0.47716081436782637, "grad_norm": 1.6921519041061401, "learning_rate": 0.00011222370972255694, "loss": 1.4168, "step": 13324 }, { "epoch": 0.4771966264973947, "grad_norm": 1.567881464958191, "learning_rate": 0.00011221219760925334, "loss": 1.4169, "step": 13325 }, { "epoch": 0.47723243862696296, "grad_norm": 1.9966059923171997, "learning_rate": 0.00011220068533165036, "loss": 1.4439, "step": 13326 }, { "epoch": 0.47726825075653123, "grad_norm": 1.6342418193817139, "learning_rate": 0.00011218917288990292, "loss": 1.4409, "step": 13327 }, { "epoch": 0.4773040628860995, "grad_norm": 1.9518680572509766, "learning_rate": 0.00011217766028416585, "loss": 1.5651, "step": 13328 }, { "epoch": 0.4773398750156678, "grad_norm": 1.5526176691055298, "learning_rate": 0.00011216614751459408, "loss": 1.6035, "step": 13329 }, { "epoch": 0.4773756871452361, "grad_norm": 1.5337470769882202, "learning_rate": 0.00011215463458134252, "loss": 1.6495, "step": 13330 }, { "epoch": 0.47741149927480436, "grad_norm": 2.321065902709961, "learning_rate": 0.000112143121484566, "loss": 1.6215, "step": 13331 }, { "epoch": 0.4774473114043727, "grad_norm": 1.355303168296814, "learning_rate": 0.00011213160822441948, "loss": 1.6907, "step": 13332 }, { "epoch": 0.47748312353394096, "grad_norm": 1.7356313467025757, "learning_rate": 0.00011212009480105777, "loss": 1.5611, "step": 13333 }, { "epoch": 0.4775189356635092, "grad_norm": 2.1936237812042236, "learning_rate": 0.00011210858121463586, "loss": 1.4047, "step": 13334 }, { "epoch": 0.4775547477930775, "grad_norm": 1.9643810987472534, "learning_rate": 0.00011209706746530858, "loss": 1.6057, "step": 13335 }, { "epoch": 0.4775905599226458, "grad_norm": 1.537932276725769, "learning_rate": 0.00011208555355323088, "loss": 1.3793, "step": 13336 }, { "epoch": 0.4776263720522141, "grad_norm": 1.4360474348068237, "learning_rate": 0.00011207403947855761, "loss": 1.2832, "step": 13337 }, { "epoch": 0.47766218418178236, "grad_norm": 1.8667532205581665, "learning_rate": 0.00011206252524144373, "loss": 1.3285, "step": 13338 }, { "epoch": 0.4776979963113507, "grad_norm": 1.374731183052063, "learning_rate": 0.00011205101084204414, "loss": 1.4788, "step": 13339 }, { "epoch": 0.47773380844091895, "grad_norm": 1.8810253143310547, "learning_rate": 0.00011203949628051376, "loss": 1.4788, "step": 13340 }, { "epoch": 0.4777696205704872, "grad_norm": 1.5324774980545044, "learning_rate": 0.00011202798155700748, "loss": 1.472, "step": 13341 }, { "epoch": 0.4778054327000555, "grad_norm": 1.8011895418167114, "learning_rate": 0.0001120164666716802, "loss": 1.3606, "step": 13342 }, { "epoch": 0.4778412448296238, "grad_norm": 1.6887016296386719, "learning_rate": 0.0001120049516246869, "loss": 1.562, "step": 13343 }, { "epoch": 0.4778770569591921, "grad_norm": 1.4731894731521606, "learning_rate": 0.0001119934364161824, "loss": 1.3769, "step": 13344 }, { "epoch": 0.47791286908876035, "grad_norm": 1.3754960298538208, "learning_rate": 0.00011198192104632174, "loss": 1.2356, "step": 13345 }, { "epoch": 0.4779486812183287, "grad_norm": 1.9692893028259277, "learning_rate": 0.00011197040551525977, "loss": 1.4277, "step": 13346 }, { "epoch": 0.47798449334789694, "grad_norm": 1.330018401145935, "learning_rate": 0.00011195888982315144, "loss": 1.4329, "step": 13347 }, { "epoch": 0.4780203054774652, "grad_norm": 2.0799808502197266, "learning_rate": 0.00011194737397015164, "loss": 1.5684, "step": 13348 }, { "epoch": 0.4780561176070335, "grad_norm": 1.6616966724395752, "learning_rate": 0.00011193585795641539, "loss": 1.2649, "step": 13349 }, { "epoch": 0.4780919297366018, "grad_norm": 1.8805179595947266, "learning_rate": 0.00011192434178209755, "loss": 1.3895, "step": 13350 }, { "epoch": 0.4781277418661701, "grad_norm": 1.4745457172393799, "learning_rate": 0.00011191282544735304, "loss": 1.6853, "step": 13351 }, { "epoch": 0.47816355399573834, "grad_norm": 1.5154930353164673, "learning_rate": 0.00011190130895233686, "loss": 1.5363, "step": 13352 }, { "epoch": 0.47819936612530667, "grad_norm": 1.273663878440857, "learning_rate": 0.00011188979229720389, "loss": 1.5082, "step": 13353 }, { "epoch": 0.47823517825487494, "grad_norm": 1.5829232931137085, "learning_rate": 0.00011187827548210915, "loss": 1.4881, "step": 13354 }, { "epoch": 0.4782709903844432, "grad_norm": 2.338301658630371, "learning_rate": 0.00011186675850720749, "loss": 1.3946, "step": 13355 }, { "epoch": 0.4783068025140115, "grad_norm": 1.3622664213180542, "learning_rate": 0.00011185524137265393, "loss": 1.5696, "step": 13356 }, { "epoch": 0.4783426146435798, "grad_norm": 2.10280179977417, "learning_rate": 0.00011184372407860336, "loss": 1.7405, "step": 13357 }, { "epoch": 0.47837842677314807, "grad_norm": 1.4048125743865967, "learning_rate": 0.00011183220662521079, "loss": 1.6725, "step": 13358 }, { "epoch": 0.47841423890271634, "grad_norm": 1.3935976028442383, "learning_rate": 0.00011182068901263114, "loss": 1.3918, "step": 13359 }, { "epoch": 0.47845005103228466, "grad_norm": 2.3466100692749023, "learning_rate": 0.00011180917124101936, "loss": 1.7777, "step": 13360 }, { "epoch": 0.47848586316185293, "grad_norm": 1.657720923423767, "learning_rate": 0.0001117976533105304, "loss": 1.563, "step": 13361 }, { "epoch": 0.4785216752914212, "grad_norm": 1.7979260683059692, "learning_rate": 0.00011178613522131924, "loss": 1.2531, "step": 13362 }, { "epoch": 0.47855748742098947, "grad_norm": 1.7003462314605713, "learning_rate": 0.00011177461697354084, "loss": 1.6431, "step": 13363 }, { "epoch": 0.4785932995505578, "grad_norm": 1.7036478519439697, "learning_rate": 0.00011176309856735014, "loss": 1.3807, "step": 13364 }, { "epoch": 0.47862911168012606, "grad_norm": 1.8643072843551636, "learning_rate": 0.00011175158000290216, "loss": 1.6466, "step": 13365 }, { "epoch": 0.47866492380969433, "grad_norm": 1.696761131286621, "learning_rate": 0.00011174006128035178, "loss": 1.4672, "step": 13366 }, { "epoch": 0.47870073593926266, "grad_norm": 1.6161099672317505, "learning_rate": 0.00011172854239985409, "loss": 1.4775, "step": 13367 }, { "epoch": 0.4787365480688309, "grad_norm": 1.5913996696472168, "learning_rate": 0.0001117170233615639, "loss": 1.2976, "step": 13368 }, { "epoch": 0.4787723601983992, "grad_norm": 1.3425949811935425, "learning_rate": 0.00011170550416563634, "loss": 1.2461, "step": 13369 }, { "epoch": 0.47880817232796746, "grad_norm": 1.8969595432281494, "learning_rate": 0.0001116939848122263, "loss": 1.4931, "step": 13370 }, { "epoch": 0.4788439844575358, "grad_norm": 1.8349109888076782, "learning_rate": 0.00011168246530148876, "loss": 1.6161, "step": 13371 }, { "epoch": 0.47887979658710406, "grad_norm": 2.742892265319824, "learning_rate": 0.00011167094563357876, "loss": 1.6424, "step": 13372 }, { "epoch": 0.4789156087166723, "grad_norm": 3.602156162261963, "learning_rate": 0.00011165942580865118, "loss": 1.5828, "step": 13373 }, { "epoch": 0.4789514208462406, "grad_norm": 1.6566842794418335, "learning_rate": 0.00011164790582686113, "loss": 1.4616, "step": 13374 }, { "epoch": 0.4789872329758089, "grad_norm": 1.709389567375183, "learning_rate": 0.0001116363856883635, "loss": 1.5164, "step": 13375 }, { "epoch": 0.4790230451053772, "grad_norm": 2.565922498703003, "learning_rate": 0.00011162486539331334, "loss": 1.4494, "step": 13376 }, { "epoch": 0.47905885723494546, "grad_norm": 1.883772373199463, "learning_rate": 0.00011161334494186557, "loss": 1.4987, "step": 13377 }, { "epoch": 0.4790946693645138, "grad_norm": 1.9089127779006958, "learning_rate": 0.00011160182433417525, "loss": 1.4315, "step": 13378 }, { "epoch": 0.47913048149408205, "grad_norm": 1.619033932685852, "learning_rate": 0.00011159030357039733, "loss": 1.2727, "step": 13379 }, { "epoch": 0.4791662936236503, "grad_norm": 1.3101295232772827, "learning_rate": 0.00011157878265068685, "loss": 1.4185, "step": 13380 }, { "epoch": 0.4792021057532186, "grad_norm": 2.5190060138702393, "learning_rate": 0.00011156726157519877, "loss": 1.8318, "step": 13381 }, { "epoch": 0.4792379178827869, "grad_norm": 1.801878809928894, "learning_rate": 0.00011155574034408812, "loss": 1.5164, "step": 13382 }, { "epoch": 0.4792737300123552, "grad_norm": 1.659848928451538, "learning_rate": 0.00011154421895750984, "loss": 1.3737, "step": 13383 }, { "epoch": 0.47930954214192345, "grad_norm": 1.8164340257644653, "learning_rate": 0.00011153269741561905, "loss": 1.5673, "step": 13384 }, { "epoch": 0.4793453542714918, "grad_norm": 1.5826667547225952, "learning_rate": 0.00011152117571857065, "loss": 1.0404, "step": 13385 }, { "epoch": 0.47938116640106004, "grad_norm": 1.6665927171707153, "learning_rate": 0.0001115096538665197, "loss": 1.4599, "step": 13386 }, { "epoch": 0.4794169785306283, "grad_norm": 2.6693007946014404, "learning_rate": 0.00011149813185962124, "loss": 1.4425, "step": 13387 }, { "epoch": 0.4794527906601966, "grad_norm": 1.5640629529953003, "learning_rate": 0.00011148660969803019, "loss": 2.0646, "step": 13388 }, { "epoch": 0.4794886027897649, "grad_norm": 1.4011385440826416, "learning_rate": 0.00011147508738190167, "loss": 1.5649, "step": 13389 }, { "epoch": 0.4795244149193332, "grad_norm": 1.667718529701233, "learning_rate": 0.0001114635649113906, "loss": 1.4943, "step": 13390 }, { "epoch": 0.47956022704890144, "grad_norm": 1.893916368484497, "learning_rate": 0.00011145204228665209, "loss": 1.2751, "step": 13391 }, { "epoch": 0.47959603917846977, "grad_norm": 1.5027039051055908, "learning_rate": 0.00011144051950784111, "loss": 1.395, "step": 13392 }, { "epoch": 0.47963185130803804, "grad_norm": 1.4640353918075562, "learning_rate": 0.00011142899657511272, "loss": 1.5298, "step": 13393 }, { "epoch": 0.4796676634376063, "grad_norm": 1.6055121421813965, "learning_rate": 0.00011141747348862191, "loss": 1.5649, "step": 13394 }, { "epoch": 0.4797034755671746, "grad_norm": 1.6210955381393433, "learning_rate": 0.00011140595024852369, "loss": 1.6393, "step": 13395 }, { "epoch": 0.4797392876967429, "grad_norm": 1.5539766550064087, "learning_rate": 0.00011139442685497317, "loss": 1.6614, "step": 13396 }, { "epoch": 0.47977509982631117, "grad_norm": 2.0545995235443115, "learning_rate": 0.0001113829033081253, "loss": 1.3625, "step": 13397 }, { "epoch": 0.47981091195587944, "grad_norm": 1.3574188947677612, "learning_rate": 0.00011137137960813517, "loss": 1.34, "step": 13398 }, { "epoch": 0.47984672408544776, "grad_norm": 1.6591427326202393, "learning_rate": 0.00011135985575515778, "loss": 1.6447, "step": 13399 }, { "epoch": 0.47988253621501603, "grad_norm": 1.2985018491744995, "learning_rate": 0.0001113483317493482, "loss": 1.4672, "step": 13400 }, { "epoch": 0.4799183483445843, "grad_norm": 1.8751863241195679, "learning_rate": 0.00011133680759086145, "loss": 1.725, "step": 13401 }, { "epoch": 0.47995416047415257, "grad_norm": 1.4524515867233276, "learning_rate": 0.00011132528327985256, "loss": 1.6149, "step": 13402 }, { "epoch": 0.4799899726037209, "grad_norm": 1.3275116682052612, "learning_rate": 0.00011131375881647664, "loss": 1.1616, "step": 13403 }, { "epoch": 0.48002578473328916, "grad_norm": 1.6403449773788452, "learning_rate": 0.00011130223420088864, "loss": 1.2983, "step": 13404 }, { "epoch": 0.48006159686285743, "grad_norm": 1.282675862312317, "learning_rate": 0.00011129070943324366, "loss": 1.3949, "step": 13405 }, { "epoch": 0.48009740899242576, "grad_norm": 1.388199806213379, "learning_rate": 0.00011127918451369676, "loss": 1.4264, "step": 13406 }, { "epoch": 0.480133221121994, "grad_norm": 1.4312509298324585, "learning_rate": 0.00011126765944240298, "loss": 1.4627, "step": 13407 }, { "epoch": 0.4801690332515623, "grad_norm": 1.5060590505599976, "learning_rate": 0.00011125613421951737, "loss": 1.3497, "step": 13408 }, { "epoch": 0.48020484538113056, "grad_norm": 1.3689676523208618, "learning_rate": 0.00011124460884519503, "loss": 1.4491, "step": 13409 }, { "epoch": 0.4802406575106989, "grad_norm": 1.402147650718689, "learning_rate": 0.00011123308331959093, "loss": 1.5145, "step": 13410 }, { "epoch": 0.48027646964026716, "grad_norm": 1.3701300621032715, "learning_rate": 0.00011122155764286024, "loss": 1.3315, "step": 13411 }, { "epoch": 0.4803122817698354, "grad_norm": 1.9080400466918945, "learning_rate": 0.00011121003181515792, "loss": 1.461, "step": 13412 }, { "epoch": 0.48034809389940375, "grad_norm": 2.7309305667877197, "learning_rate": 0.00011119850583663908, "loss": 1.8333, "step": 13413 }, { "epoch": 0.480383906028972, "grad_norm": 2.27876615524292, "learning_rate": 0.00011118697970745881, "loss": 1.5516, "step": 13414 }, { "epoch": 0.4804197181585403, "grad_norm": 1.6473275423049927, "learning_rate": 0.00011117545342777215, "loss": 1.3657, "step": 13415 }, { "epoch": 0.48045553028810856, "grad_norm": 1.6574909687042236, "learning_rate": 0.0001111639269977342, "loss": 1.6839, "step": 13416 }, { "epoch": 0.4804913424176769, "grad_norm": 1.214024305343628, "learning_rate": 0.00011115240041749999, "loss": 1.4461, "step": 13417 }, { "epoch": 0.48052715454724515, "grad_norm": 2.024338960647583, "learning_rate": 0.00011114087368722463, "loss": 1.2144, "step": 13418 }, { "epoch": 0.4805629666768134, "grad_norm": 1.9191267490386963, "learning_rate": 0.00011112934680706317, "loss": 1.5104, "step": 13419 }, { "epoch": 0.48059877880638174, "grad_norm": 1.7122324705123901, "learning_rate": 0.00011111781977717075, "loss": 1.2507, "step": 13420 }, { "epoch": 0.48063459093595, "grad_norm": 1.6362801790237427, "learning_rate": 0.00011110629259770235, "loss": 1.6835, "step": 13421 }, { "epoch": 0.4806704030655183, "grad_norm": 1.6250897645950317, "learning_rate": 0.00011109476526881313, "loss": 1.4953, "step": 13422 }, { "epoch": 0.48070621519508655, "grad_norm": 1.9805313348770142, "learning_rate": 0.00011108323779065814, "loss": 1.5814, "step": 13423 }, { "epoch": 0.4807420273246549, "grad_norm": 1.6796422004699707, "learning_rate": 0.00011107171016339251, "loss": 1.6619, "step": 13424 }, { "epoch": 0.48077783945422314, "grad_norm": 1.6529179811477661, "learning_rate": 0.00011106018238717128, "loss": 1.4423, "step": 13425 }, { "epoch": 0.4808136515837914, "grad_norm": 1.6704829931259155, "learning_rate": 0.00011104865446214957, "loss": 1.3773, "step": 13426 }, { "epoch": 0.48084946371335974, "grad_norm": 1.3127925395965576, "learning_rate": 0.00011103712638848244, "loss": 1.5127, "step": 13427 }, { "epoch": 0.480885275842928, "grad_norm": 1.6566414833068848, "learning_rate": 0.00011102559816632507, "loss": 1.6901, "step": 13428 }, { "epoch": 0.4809210879724963, "grad_norm": 1.6600404977798462, "learning_rate": 0.00011101406979583246, "loss": 1.3477, "step": 13429 }, { "epoch": 0.48095690010206454, "grad_norm": 2.285459518432617, "learning_rate": 0.00011100254127715975, "loss": 1.3787, "step": 13430 }, { "epoch": 0.48099271223163287, "grad_norm": 1.7669966220855713, "learning_rate": 0.00011099101261046205, "loss": 1.4803, "step": 13431 }, { "epoch": 0.48102852436120114, "grad_norm": 1.491285800933838, "learning_rate": 0.00011097948379589444, "loss": 1.4013, "step": 13432 }, { "epoch": 0.4810643364907694, "grad_norm": 1.9096039533615112, "learning_rate": 0.00011096795483361205, "loss": 1.3939, "step": 13433 }, { "epoch": 0.48110014862033773, "grad_norm": 1.4066028594970703, "learning_rate": 0.00011095642572376996, "loss": 1.4652, "step": 13434 }, { "epoch": 0.481135960749906, "grad_norm": 1.5006831884384155, "learning_rate": 0.0001109448964665233, "loss": 1.3876, "step": 13435 }, { "epoch": 0.48117177287947427, "grad_norm": 1.5165512561798096, "learning_rate": 0.00011093336706202717, "loss": 1.696, "step": 13436 }, { "epoch": 0.48120758500904254, "grad_norm": 1.273987889289856, "learning_rate": 0.00011092183751043672, "loss": 1.608, "step": 13437 }, { "epoch": 0.48124339713861086, "grad_norm": 1.6874688863754272, "learning_rate": 0.000110910307811907, "loss": 1.301, "step": 13438 }, { "epoch": 0.48127920926817913, "grad_norm": 1.5528160333633423, "learning_rate": 0.00011089877796659319, "loss": 1.3853, "step": 13439 }, { "epoch": 0.4813150213977474, "grad_norm": 1.5378245115280151, "learning_rate": 0.00011088724797465036, "loss": 1.5139, "step": 13440 }, { "epoch": 0.4813508335273157, "grad_norm": 2.0010392665863037, "learning_rate": 0.00011087571783623365, "loss": 1.5756, "step": 13441 }, { "epoch": 0.481386645656884, "grad_norm": 1.7677205801010132, "learning_rate": 0.0001108641875514982, "loss": 1.336, "step": 13442 }, { "epoch": 0.48142245778645226, "grad_norm": 2.936492443084717, "learning_rate": 0.00011085265712059909, "loss": 1.787, "step": 13443 }, { "epoch": 0.48145826991602053, "grad_norm": 1.785649061203003, "learning_rate": 0.00011084112654369152, "loss": 1.2747, "step": 13444 }, { "epoch": 0.48149408204558886, "grad_norm": 1.6284798383712769, "learning_rate": 0.00011082959582093055, "loss": 1.3396, "step": 13445 }, { "epoch": 0.4815298941751571, "grad_norm": 1.441011667251587, "learning_rate": 0.00011081806495247136, "loss": 1.6317, "step": 13446 }, { "epoch": 0.4815657063047254, "grad_norm": 1.789101004600525, "learning_rate": 0.00011080653393846905, "loss": 1.6096, "step": 13447 }, { "epoch": 0.4816015184342937, "grad_norm": 2.5847105979919434, "learning_rate": 0.00011079500277907875, "loss": 1.321, "step": 13448 }, { "epoch": 0.481637330563862, "grad_norm": 1.5998847484588623, "learning_rate": 0.00011078347147445563, "loss": 1.5482, "step": 13449 }, { "epoch": 0.48167314269343026, "grad_norm": 1.8561943769454956, "learning_rate": 0.0001107719400247548, "loss": 1.3777, "step": 13450 }, { "epoch": 0.4817089548229985, "grad_norm": 2.0072786808013916, "learning_rate": 0.00011076040843013141, "loss": 1.149, "step": 13451 }, { "epoch": 0.48174476695256685, "grad_norm": 1.9042658805847168, "learning_rate": 0.00011074887669074058, "loss": 1.3163, "step": 13452 }, { "epoch": 0.4817805790821351, "grad_norm": 1.6006115674972534, "learning_rate": 0.00011073734480673754, "loss": 1.5273, "step": 13453 }, { "epoch": 0.4818163912117034, "grad_norm": 1.7646973133087158, "learning_rate": 0.00011072581277827732, "loss": 1.5288, "step": 13454 }, { "epoch": 0.4818522033412717, "grad_norm": 2.283583641052246, "learning_rate": 0.00011071428060551517, "loss": 1.7976, "step": 13455 }, { "epoch": 0.48188801547084, "grad_norm": 1.4994382858276367, "learning_rate": 0.00011070274828860618, "loss": 1.3215, "step": 13456 }, { "epoch": 0.48192382760040825, "grad_norm": 1.39043128490448, "learning_rate": 0.0001106912158277055, "loss": 1.3991, "step": 13457 }, { "epoch": 0.4819596397299765, "grad_norm": 1.6843230724334717, "learning_rate": 0.00011067968322296831, "loss": 1.6234, "step": 13458 }, { "epoch": 0.48199545185954484, "grad_norm": 1.7703015804290771, "learning_rate": 0.00011066815047454975, "loss": 1.517, "step": 13459 }, { "epoch": 0.4820312639891131, "grad_norm": 1.695458173751831, "learning_rate": 0.00011065661758260502, "loss": 1.6138, "step": 13460 }, { "epoch": 0.4820670761186814, "grad_norm": 1.6094825267791748, "learning_rate": 0.00011064508454728921, "loss": 1.3752, "step": 13461 }, { "epoch": 0.4821028882482497, "grad_norm": 1.7632977962493896, "learning_rate": 0.00011063355136875753, "loss": 1.541, "step": 13462 }, { "epoch": 0.482138700377818, "grad_norm": 1.7887697219848633, "learning_rate": 0.00011062201804716512, "loss": 1.5856, "step": 13463 }, { "epoch": 0.48217451250738624, "grad_norm": 1.7318227291107178, "learning_rate": 0.0001106104845826672, "loss": 1.2289, "step": 13464 }, { "epoch": 0.4822103246369545, "grad_norm": 1.6601182222366333, "learning_rate": 0.00011059895097541888, "loss": 1.875, "step": 13465 }, { "epoch": 0.48224613676652284, "grad_norm": 1.9784289598464966, "learning_rate": 0.00011058741722557533, "loss": 1.3676, "step": 13466 }, { "epoch": 0.4822819488960911, "grad_norm": 1.8831995725631714, "learning_rate": 0.00011057588333329174, "loss": 1.6114, "step": 13467 }, { "epoch": 0.4823177610256594, "grad_norm": 1.6999905109405518, "learning_rate": 0.00011056434929872325, "loss": 1.6761, "step": 13468 }, { "epoch": 0.4823535731552277, "grad_norm": 1.5982940196990967, "learning_rate": 0.00011055281512202513, "loss": 1.4517, "step": 13469 }, { "epoch": 0.48238938528479597, "grad_norm": 2.8717644214630127, "learning_rate": 0.00011054128080335246, "loss": 1.6728, "step": 13470 }, { "epoch": 0.48242519741436424, "grad_norm": 1.9627916812896729, "learning_rate": 0.00011052974634286046, "loss": 1.7458, "step": 13471 }, { "epoch": 0.4824610095439325, "grad_norm": 1.698922872543335, "learning_rate": 0.00011051821174070429, "loss": 1.8436, "step": 13472 }, { "epoch": 0.48249682167350083, "grad_norm": 1.2740743160247803, "learning_rate": 0.0001105066769970392, "loss": 1.5197, "step": 13473 }, { "epoch": 0.4825326338030691, "grad_norm": 1.7765341997146606, "learning_rate": 0.00011049514211202028, "loss": 1.8556, "step": 13474 }, { "epoch": 0.48256844593263737, "grad_norm": 1.9646724462509155, "learning_rate": 0.00011048360708580279, "loss": 1.5785, "step": 13475 }, { "epoch": 0.4826042580622057, "grad_norm": 2.2040209770202637, "learning_rate": 0.00011047207191854185, "loss": 1.5735, "step": 13476 }, { "epoch": 0.48264007019177396, "grad_norm": 1.3888829946517944, "learning_rate": 0.00011046053661039273, "loss": 1.4665, "step": 13477 }, { "epoch": 0.48267588232134223, "grad_norm": 1.831881046295166, "learning_rate": 0.00011044900116151053, "loss": 1.6623, "step": 13478 }, { "epoch": 0.4827116944509105, "grad_norm": 1.4265269041061401, "learning_rate": 0.00011043746557205054, "loss": 1.5404, "step": 13479 }, { "epoch": 0.4827475065804788, "grad_norm": 1.828692078590393, "learning_rate": 0.0001104259298421679, "loss": 1.5943, "step": 13480 }, { "epoch": 0.4827833187100471, "grad_norm": 1.9613791704177856, "learning_rate": 0.00011041439397201785, "loss": 1.6634, "step": 13481 }, { "epoch": 0.48281913083961536, "grad_norm": 1.9883949756622314, "learning_rate": 0.00011040285796175553, "loss": 1.19, "step": 13482 }, { "epoch": 0.4828549429691837, "grad_norm": 1.7061982154846191, "learning_rate": 0.00011039132181153618, "loss": 1.5244, "step": 13483 }, { "epoch": 0.48289075509875196, "grad_norm": 1.7107230424880981, "learning_rate": 0.00011037978552151502, "loss": 1.4301, "step": 13484 }, { "epoch": 0.4829265672283202, "grad_norm": 2.234708070755005, "learning_rate": 0.0001103682490918472, "loss": 1.8736, "step": 13485 }, { "epoch": 0.4829623793578885, "grad_norm": 1.9554941654205322, "learning_rate": 0.000110356712522688, "loss": 1.6488, "step": 13486 }, { "epoch": 0.4829981914874568, "grad_norm": 1.7168772220611572, "learning_rate": 0.00011034517581419255, "loss": 1.7765, "step": 13487 }, { "epoch": 0.4830340036170251, "grad_norm": 1.7673360109329224, "learning_rate": 0.00011033363896651613, "loss": 1.3514, "step": 13488 }, { "epoch": 0.48306981574659336, "grad_norm": 1.7775872945785522, "learning_rate": 0.00011032210197981392, "loss": 1.2557, "step": 13489 }, { "epoch": 0.4831056278761617, "grad_norm": 1.549501895904541, "learning_rate": 0.00011031056485424116, "loss": 1.4594, "step": 13490 }, { "epoch": 0.48314144000572995, "grad_norm": 1.6182304620742798, "learning_rate": 0.00011029902758995304, "loss": 1.4751, "step": 13491 }, { "epoch": 0.4831772521352982, "grad_norm": 1.4240484237670898, "learning_rate": 0.00011028749018710478, "loss": 1.3701, "step": 13492 }, { "epoch": 0.4832130642648665, "grad_norm": 1.6473289728164673, "learning_rate": 0.00011027595264585162, "loss": 1.2206, "step": 13493 }, { "epoch": 0.4832488763944348, "grad_norm": 2.2116892337799072, "learning_rate": 0.00011026441496634874, "loss": 1.6348, "step": 13494 }, { "epoch": 0.4832846885240031, "grad_norm": 1.27376127243042, "learning_rate": 0.00011025287714875143, "loss": 1.3734, "step": 13495 }, { "epoch": 0.48332050065357135, "grad_norm": 1.800029993057251, "learning_rate": 0.00011024133919321486, "loss": 1.5834, "step": 13496 }, { "epoch": 0.4833563127831397, "grad_norm": 1.679153561592102, "learning_rate": 0.00011022980109989431, "loss": 1.4844, "step": 13497 }, { "epoch": 0.48339212491270794, "grad_norm": 2.0695579051971436, "learning_rate": 0.00011021826286894496, "loss": 1.7796, "step": 13498 }, { "epoch": 0.4834279370422762, "grad_norm": 1.740222692489624, "learning_rate": 0.0001102067245005221, "loss": 1.4109, "step": 13499 }, { "epoch": 0.4834637491718445, "grad_norm": 1.6901319026947021, "learning_rate": 0.0001101951859947809, "loss": 1.8769, "step": 13500 }, { "epoch": 0.4834995613014128, "grad_norm": 1.6777935028076172, "learning_rate": 0.00011018364735187661, "loss": 1.4192, "step": 13501 }, { "epoch": 0.4835353734309811, "grad_norm": 1.7092137336730957, "learning_rate": 0.00011017210857196449, "loss": 1.6799, "step": 13502 }, { "epoch": 0.48357118556054934, "grad_norm": 1.8037439584732056, "learning_rate": 0.00011016056965519979, "loss": 1.5957, "step": 13503 }, { "epoch": 0.48360699769011767, "grad_norm": 1.3701008558273315, "learning_rate": 0.00011014903060173772, "loss": 1.6095, "step": 13504 }, { "epoch": 0.48364280981968594, "grad_norm": 1.4158246517181396, "learning_rate": 0.00011013749141173351, "loss": 1.3914, "step": 13505 }, { "epoch": 0.4836786219492542, "grad_norm": 1.9191498756408691, "learning_rate": 0.00011012595208534247, "loss": 1.9305, "step": 13506 }, { "epoch": 0.4837144340788225, "grad_norm": 2.0347769260406494, "learning_rate": 0.00011011441262271975, "loss": 1.3077, "step": 13507 }, { "epoch": 0.4837502462083908, "grad_norm": 1.7553139925003052, "learning_rate": 0.00011010287302402073, "loss": 1.2249, "step": 13508 }, { "epoch": 0.48378605833795907, "grad_norm": 1.331820011138916, "learning_rate": 0.00011009133328940053, "loss": 1.5748, "step": 13509 }, { "epoch": 0.48382187046752734, "grad_norm": 1.791427731513977, "learning_rate": 0.00011007979341901446, "loss": 1.4737, "step": 13510 }, { "epoch": 0.48385768259709566, "grad_norm": 1.544029951095581, "learning_rate": 0.0001100682534130178, "loss": 1.4346, "step": 13511 }, { "epoch": 0.48389349472666393, "grad_norm": 1.728143334388733, "learning_rate": 0.00011005671327156574, "loss": 1.5266, "step": 13512 }, { "epoch": 0.4839293068562322, "grad_norm": 1.5404763221740723, "learning_rate": 0.00011004517299481363, "loss": 1.4122, "step": 13513 }, { "epoch": 0.48396511898580047, "grad_norm": 1.9863710403442383, "learning_rate": 0.00011003363258291664, "loss": 1.6186, "step": 13514 }, { "epoch": 0.4840009311153688, "grad_norm": 2.0675201416015625, "learning_rate": 0.00011002209203603007, "loss": 1.4077, "step": 13515 }, { "epoch": 0.48403674324493706, "grad_norm": 1.5609140396118164, "learning_rate": 0.00011001055135430916, "loss": 1.3322, "step": 13516 }, { "epoch": 0.48407255537450533, "grad_norm": 1.3592745065689087, "learning_rate": 0.00010999901053790924, "loss": 1.5969, "step": 13517 }, { "epoch": 0.48410836750407366, "grad_norm": 2.3698625564575195, "learning_rate": 0.0001099874695869855, "loss": 1.474, "step": 13518 }, { "epoch": 0.4841441796336419, "grad_norm": 2.192758798599243, "learning_rate": 0.00010997592850169325, "loss": 1.759, "step": 13519 }, { "epoch": 0.4841799917632102, "grad_norm": 1.3578786849975586, "learning_rate": 0.00010996438728218772, "loss": 1.4524, "step": 13520 }, { "epoch": 0.48421580389277846, "grad_norm": 1.4247230291366577, "learning_rate": 0.00010995284592862425, "loss": 1.782, "step": 13521 }, { "epoch": 0.4842516160223468, "grad_norm": 2.091456890106201, "learning_rate": 0.00010994130444115804, "loss": 1.5596, "step": 13522 }, { "epoch": 0.48428742815191506, "grad_norm": 2.0627150535583496, "learning_rate": 0.00010992976281994443, "loss": 1.3551, "step": 13523 }, { "epoch": 0.4843232402814833, "grad_norm": 1.4531793594360352, "learning_rate": 0.00010991822106513867, "loss": 1.4569, "step": 13524 }, { "epoch": 0.48435905241105165, "grad_norm": 2.3870937824249268, "learning_rate": 0.00010990667917689603, "loss": 1.7541, "step": 13525 }, { "epoch": 0.4843948645406199, "grad_norm": 1.8330572843551636, "learning_rate": 0.00010989513715537184, "loss": 1.6933, "step": 13526 }, { "epoch": 0.4844306766701882, "grad_norm": 1.6797586679458618, "learning_rate": 0.00010988359500072128, "loss": 1.3094, "step": 13527 }, { "epoch": 0.48446648879975646, "grad_norm": 1.7150591611862183, "learning_rate": 0.00010987205271309972, "loss": 1.5811, "step": 13528 }, { "epoch": 0.4845023009293248, "grad_norm": 1.74473237991333, "learning_rate": 0.00010986051029266242, "loss": 1.6228, "step": 13529 }, { "epoch": 0.48453811305889305, "grad_norm": 2.2052595615386963, "learning_rate": 0.0001098489677395647, "loss": 1.8501, "step": 13530 }, { "epoch": 0.4845739251884613, "grad_norm": 1.872136116027832, "learning_rate": 0.0001098374250539618, "loss": 1.5437, "step": 13531 }, { "epoch": 0.48460973731802964, "grad_norm": 2.1946043968200684, "learning_rate": 0.00010982588223600905, "loss": 1.2338, "step": 13532 }, { "epoch": 0.4846455494475979, "grad_norm": 1.7079534530639648, "learning_rate": 0.00010981433928586168, "loss": 1.8226, "step": 13533 }, { "epoch": 0.4846813615771662, "grad_norm": 1.761330008506775, "learning_rate": 0.00010980279620367511, "loss": 1.4908, "step": 13534 }, { "epoch": 0.48471717370673445, "grad_norm": 1.1783751249313354, "learning_rate": 0.00010979125298960453, "loss": 1.5007, "step": 13535 }, { "epoch": 0.4847529858363028, "grad_norm": 1.9154187440872192, "learning_rate": 0.00010977970964380526, "loss": 1.528, "step": 13536 }, { "epoch": 0.48478879796587104, "grad_norm": 1.502217411994934, "learning_rate": 0.00010976816616643262, "loss": 1.37, "step": 13537 }, { "epoch": 0.4848246100954393, "grad_norm": 1.479232907295227, "learning_rate": 0.0001097566225576419, "loss": 1.4488, "step": 13538 }, { "epoch": 0.48486042222500764, "grad_norm": 1.262938380241394, "learning_rate": 0.00010974507881758842, "loss": 1.6514, "step": 13539 }, { "epoch": 0.4848962343545759, "grad_norm": 1.8512773513793945, "learning_rate": 0.00010973353494642745, "loss": 1.5816, "step": 13540 }, { "epoch": 0.4849320464841442, "grad_norm": 1.9465118646621704, "learning_rate": 0.00010972199094431435, "loss": 1.7751, "step": 13541 }, { "epoch": 0.48496785861371244, "grad_norm": 1.3644713163375854, "learning_rate": 0.00010971044681140437, "loss": 1.7066, "step": 13542 }, { "epoch": 0.48500367074328077, "grad_norm": 1.4403965473175049, "learning_rate": 0.0001096989025478529, "loss": 1.6519, "step": 13543 }, { "epoch": 0.48503948287284904, "grad_norm": 1.8270875215530396, "learning_rate": 0.0001096873581538152, "loss": 1.712, "step": 13544 }, { "epoch": 0.4850752950024173, "grad_norm": 1.5152555704116821, "learning_rate": 0.00010967581362944654, "loss": 1.6795, "step": 13545 }, { "epoch": 0.48511110713198563, "grad_norm": 1.2786191701889038, "learning_rate": 0.00010966426897490234, "loss": 1.5847, "step": 13546 }, { "epoch": 0.4851469192615539, "grad_norm": 1.5491951704025269, "learning_rate": 0.00010965272419033782, "loss": 1.3634, "step": 13547 }, { "epoch": 0.48518273139112217, "grad_norm": 2.059805393218994, "learning_rate": 0.0001096411792759084, "loss": 1.2069, "step": 13548 }, { "epoch": 0.48521854352069044, "grad_norm": 1.5590767860412598, "learning_rate": 0.0001096296342317693, "loss": 1.3519, "step": 13549 }, { "epoch": 0.48525435565025876, "grad_norm": 1.7230417728424072, "learning_rate": 0.00010961808905807593, "loss": 1.4459, "step": 13550 }, { "epoch": 0.48529016777982703, "grad_norm": 1.9240474700927734, "learning_rate": 0.00010960654375498357, "loss": 1.6497, "step": 13551 }, { "epoch": 0.4853259799093953, "grad_norm": 1.6086010932922363, "learning_rate": 0.00010959499832264754, "loss": 1.5178, "step": 13552 }, { "epoch": 0.4853617920389636, "grad_norm": 1.5947424173355103, "learning_rate": 0.00010958345276122322, "loss": 1.203, "step": 13553 }, { "epoch": 0.4853976041685319, "grad_norm": 1.5040249824523926, "learning_rate": 0.00010957190707086586, "loss": 1.3151, "step": 13554 }, { "epoch": 0.48543341629810016, "grad_norm": 1.2605984210968018, "learning_rate": 0.00010956036125173088, "loss": 1.5936, "step": 13555 }, { "epoch": 0.48546922842766843, "grad_norm": 1.7355071306228638, "learning_rate": 0.00010954881530397352, "loss": 1.5246, "step": 13556 }, { "epoch": 0.48550504055723676, "grad_norm": 1.455668568611145, "learning_rate": 0.0001095372692277492, "loss": 1.3897, "step": 13557 }, { "epoch": 0.485540852686805, "grad_norm": 1.6682887077331543, "learning_rate": 0.00010952572302321322, "loss": 1.4213, "step": 13558 }, { "epoch": 0.4855766648163733, "grad_norm": 1.7977643013000488, "learning_rate": 0.00010951417669052093, "loss": 1.573, "step": 13559 }, { "epoch": 0.4856124769459416, "grad_norm": 1.5340325832366943, "learning_rate": 0.00010950263022982766, "loss": 1.6457, "step": 13560 }, { "epoch": 0.4856482890755099, "grad_norm": 2.6738224029541016, "learning_rate": 0.0001094910836412888, "loss": 1.4636, "step": 13561 }, { "epoch": 0.48568410120507816, "grad_norm": 1.7948660850524902, "learning_rate": 0.00010947953692505959, "loss": 1.7762, "step": 13562 }, { "epoch": 0.4857199133346464, "grad_norm": 1.6173733472824097, "learning_rate": 0.00010946799008129547, "loss": 1.169, "step": 13563 }, { "epoch": 0.48575572546421475, "grad_norm": 2.0082993507385254, "learning_rate": 0.00010945644311015172, "loss": 1.8853, "step": 13564 }, { "epoch": 0.485791537593783, "grad_norm": 2.996189832687378, "learning_rate": 0.00010944489601178373, "loss": 1.7372, "step": 13565 }, { "epoch": 0.4858273497233513, "grad_norm": 1.859113097190857, "learning_rate": 0.0001094333487863469, "loss": 1.6007, "step": 13566 }, { "epoch": 0.48586316185291956, "grad_norm": 1.7060550451278687, "learning_rate": 0.00010942180143399647, "loss": 1.445, "step": 13567 }, { "epoch": 0.4858989739824879, "grad_norm": 2.118669271469116, "learning_rate": 0.0001094102539548879, "loss": 1.4383, "step": 13568 }, { "epoch": 0.48593478611205615, "grad_norm": 2.2744529247283936, "learning_rate": 0.00010939870634917647, "loss": 1.8303, "step": 13569 }, { "epoch": 0.4859705982416244, "grad_norm": 1.9283888339996338, "learning_rate": 0.00010938715861701762, "loss": 1.7504, "step": 13570 }, { "epoch": 0.48600641037119274, "grad_norm": 1.3578182458877563, "learning_rate": 0.00010937561075856662, "loss": 1.5591, "step": 13571 }, { "epoch": 0.486042222500761, "grad_norm": 1.8386645317077637, "learning_rate": 0.00010936406277397888, "loss": 1.5983, "step": 13572 }, { "epoch": 0.4860780346303293, "grad_norm": 1.6330554485321045, "learning_rate": 0.00010935251466340973, "loss": 1.4696, "step": 13573 }, { "epoch": 0.48611384675989755, "grad_norm": 1.2304561138153076, "learning_rate": 0.0001093409664270146, "loss": 1.3601, "step": 13574 }, { "epoch": 0.4861496588894659, "grad_norm": 1.8582013845443726, "learning_rate": 0.00010932941806494876, "loss": 1.6357, "step": 13575 }, { "epoch": 0.48618547101903414, "grad_norm": 1.4908101558685303, "learning_rate": 0.0001093178695773677, "loss": 1.2766, "step": 13576 }, { "epoch": 0.4862212831486024, "grad_norm": 1.5224658250808716, "learning_rate": 0.00010930632096442665, "loss": 1.6613, "step": 13577 }, { "epoch": 0.48625709527817074, "grad_norm": 1.5885058641433716, "learning_rate": 0.00010929477222628113, "loss": 1.4425, "step": 13578 }, { "epoch": 0.486292907407739, "grad_norm": 1.9274603128433228, "learning_rate": 0.00010928322336308641, "loss": 1.5363, "step": 13579 }, { "epoch": 0.4863287195373073, "grad_norm": 1.4560315608978271, "learning_rate": 0.00010927167437499788, "loss": 1.6627, "step": 13580 }, { "epoch": 0.48636453166687554, "grad_norm": 1.4085999727249146, "learning_rate": 0.00010926012526217095, "loss": 1.3984, "step": 13581 }, { "epoch": 0.48640034379644387, "grad_norm": 1.5395698547363281, "learning_rate": 0.00010924857602476095, "loss": 1.3167, "step": 13582 }, { "epoch": 0.48643615592601214, "grad_norm": 1.8653262853622437, "learning_rate": 0.00010923702666292333, "loss": 1.6573, "step": 13583 }, { "epoch": 0.4864719680555804, "grad_norm": 2.26827073097229, "learning_rate": 0.0001092254771768134, "loss": 1.6199, "step": 13584 }, { "epoch": 0.48650778018514873, "grad_norm": 1.4036325216293335, "learning_rate": 0.0001092139275665866, "loss": 1.6415, "step": 13585 }, { "epoch": 0.486543592314717, "grad_norm": 1.6470149755477905, "learning_rate": 0.00010920237783239824, "loss": 1.3443, "step": 13586 }, { "epoch": 0.48657940444428527, "grad_norm": 1.6262797117233276, "learning_rate": 0.0001091908279744038, "loss": 1.4092, "step": 13587 }, { "epoch": 0.48661521657385354, "grad_norm": 1.7407779693603516, "learning_rate": 0.00010917927799275865, "loss": 1.4334, "step": 13588 }, { "epoch": 0.48665102870342186, "grad_norm": 2.5387730598449707, "learning_rate": 0.00010916772788761809, "loss": 1.5939, "step": 13589 }, { "epoch": 0.48668684083299013, "grad_norm": 2.3920860290527344, "learning_rate": 0.00010915617765913761, "loss": 1.4822, "step": 13590 }, { "epoch": 0.4867226529625584, "grad_norm": 2.5543646812438965, "learning_rate": 0.00010914462730747257, "loss": 1.477, "step": 13591 }, { "epoch": 0.4867584650921267, "grad_norm": 1.4672547578811646, "learning_rate": 0.00010913307683277838, "loss": 1.4579, "step": 13592 }, { "epoch": 0.486794277221695, "grad_norm": 2.104875326156616, "learning_rate": 0.0001091215262352104, "loss": 1.5106, "step": 13593 }, { "epoch": 0.48683008935126326, "grad_norm": 1.5510029792785645, "learning_rate": 0.00010910997551492405, "loss": 1.319, "step": 13594 }, { "epoch": 0.48686590148083153, "grad_norm": 1.9528536796569824, "learning_rate": 0.00010909842467207472, "loss": 1.5796, "step": 13595 }, { "epoch": 0.48690171361039986, "grad_norm": 3.5056748390197754, "learning_rate": 0.00010908687370681785, "loss": 1.6121, "step": 13596 }, { "epoch": 0.4869375257399681, "grad_norm": 2.442275047302246, "learning_rate": 0.00010907532261930881, "loss": 1.5149, "step": 13597 }, { "epoch": 0.4869733378695364, "grad_norm": 2.3697030544281006, "learning_rate": 0.00010906377140970301, "loss": 1.6489, "step": 13598 }, { "epoch": 0.4870091499991047, "grad_norm": 2.148909568786621, "learning_rate": 0.00010905222007815585, "loss": 1.3923, "step": 13599 }, { "epoch": 0.487044962128673, "grad_norm": 2.649132490158081, "learning_rate": 0.00010904066862482274, "loss": 1.5161, "step": 13600 }, { "epoch": 0.48708077425824126, "grad_norm": 1.370690941810608, "learning_rate": 0.00010902911704985912, "loss": 1.5841, "step": 13601 }, { "epoch": 0.4871165863878095, "grad_norm": 2.150310516357422, "learning_rate": 0.00010901756535342033, "loss": 1.7005, "step": 13602 }, { "epoch": 0.48715239851737785, "grad_norm": 1.4429428577423096, "learning_rate": 0.00010900601353566188, "loss": 1.5266, "step": 13603 }, { "epoch": 0.4871882106469461, "grad_norm": 1.5756672620773315, "learning_rate": 0.0001089944615967391, "loss": 1.3184, "step": 13604 }, { "epoch": 0.4872240227765144, "grad_norm": 1.7169619798660278, "learning_rate": 0.0001089829095368075, "loss": 1.4519, "step": 13605 }, { "epoch": 0.4872598349060827, "grad_norm": 1.8188672065734863, "learning_rate": 0.00010897135735602238, "loss": 1.3889, "step": 13606 }, { "epoch": 0.487295647035651, "grad_norm": 1.3726422786712646, "learning_rate": 0.00010895980505453924, "loss": 1.2956, "step": 13607 }, { "epoch": 0.48733145916521925, "grad_norm": 1.986498236656189, "learning_rate": 0.00010894825263251345, "loss": 1.7822, "step": 13608 }, { "epoch": 0.4873672712947875, "grad_norm": 1.6833266019821167, "learning_rate": 0.00010893670009010049, "loss": 1.4204, "step": 13609 }, { "epoch": 0.48740308342435584, "grad_norm": 1.488139271736145, "learning_rate": 0.00010892514742745576, "loss": 1.1151, "step": 13610 }, { "epoch": 0.4874388955539241, "grad_norm": 1.7593934535980225, "learning_rate": 0.00010891359464473468, "loss": 1.5433, "step": 13611 }, { "epoch": 0.4874747076834924, "grad_norm": 1.4429570436477661, "learning_rate": 0.00010890204174209269, "loss": 1.4472, "step": 13612 }, { "epoch": 0.4875105198130607, "grad_norm": 1.4732005596160889, "learning_rate": 0.00010889048871968517, "loss": 1.4272, "step": 13613 }, { "epoch": 0.487546331942629, "grad_norm": 2.193288564682007, "learning_rate": 0.00010887893557766766, "loss": 1.7031, "step": 13614 }, { "epoch": 0.48758214407219724, "grad_norm": 1.877280592918396, "learning_rate": 0.00010886738231619549, "loss": 1.6894, "step": 13615 }, { "epoch": 0.4876179562017655, "grad_norm": 1.6814889907836914, "learning_rate": 0.00010885582893542411, "loss": 1.5371, "step": 13616 }, { "epoch": 0.48765376833133384, "grad_norm": 1.7409570217132568, "learning_rate": 0.00010884427543550899, "loss": 1.8072, "step": 13617 }, { "epoch": 0.4876895804609021, "grad_norm": 1.5650906562805176, "learning_rate": 0.00010883272181660558, "loss": 1.2065, "step": 13618 }, { "epoch": 0.4877253925904704, "grad_norm": 1.6816296577453613, "learning_rate": 0.00010882116807886924, "loss": 1.491, "step": 13619 }, { "epoch": 0.4877612047200387, "grad_norm": 1.757469892501831, "learning_rate": 0.0001088096142224555, "loss": 1.5318, "step": 13620 }, { "epoch": 0.48779701684960697, "grad_norm": 1.3962442874908447, "learning_rate": 0.00010879806024751975, "loss": 1.3877, "step": 13621 }, { "epoch": 0.48783282897917524, "grad_norm": 2.594102144241333, "learning_rate": 0.00010878650615421744, "loss": 1.4608, "step": 13622 }, { "epoch": 0.4878686411087435, "grad_norm": 1.906245231628418, "learning_rate": 0.00010877495194270407, "loss": 1.5072, "step": 13623 }, { "epoch": 0.48790445323831183, "grad_norm": 1.6580541133880615, "learning_rate": 0.00010876339761313499, "loss": 1.4024, "step": 13624 }, { "epoch": 0.4879402653678801, "grad_norm": 1.6919373273849487, "learning_rate": 0.00010875184316566571, "loss": 1.5576, "step": 13625 }, { "epoch": 0.48797607749744837, "grad_norm": 1.9276906251907349, "learning_rate": 0.00010874028860045166, "loss": 1.4877, "step": 13626 }, { "epoch": 0.4880118896270167, "grad_norm": 1.4505412578582764, "learning_rate": 0.00010872873391764833, "loss": 1.4795, "step": 13627 }, { "epoch": 0.48804770175658496, "grad_norm": 1.4646049737930298, "learning_rate": 0.00010871717911741113, "loss": 1.3365, "step": 13628 }, { "epoch": 0.48808351388615323, "grad_norm": 1.593316674232483, "learning_rate": 0.00010870562419989552, "loss": 1.4228, "step": 13629 }, { "epoch": 0.4881193260157215, "grad_norm": 1.555849552154541, "learning_rate": 0.00010869406916525698, "loss": 1.5343, "step": 13630 }, { "epoch": 0.4881551381452898, "grad_norm": 2.0024313926696777, "learning_rate": 0.00010868251401365095, "loss": 1.4696, "step": 13631 }, { "epoch": 0.4881909502748581, "grad_norm": 1.810181736946106, "learning_rate": 0.0001086709587452329, "loss": 1.3037, "step": 13632 }, { "epoch": 0.48822676240442636, "grad_norm": 2.0608158111572266, "learning_rate": 0.00010865940336015828, "loss": 1.5016, "step": 13633 }, { "epoch": 0.4882625745339947, "grad_norm": 1.37624192237854, "learning_rate": 0.00010864784785858256, "loss": 1.296, "step": 13634 }, { "epoch": 0.48829838666356296, "grad_norm": 1.3577290773391724, "learning_rate": 0.00010863629224066116, "loss": 1.3755, "step": 13635 }, { "epoch": 0.4883341987931312, "grad_norm": 1.884279727935791, "learning_rate": 0.00010862473650654965, "loss": 1.6172, "step": 13636 }, { "epoch": 0.4883700109226995, "grad_norm": 1.4926834106445312, "learning_rate": 0.00010861318065640338, "loss": 1.3022, "step": 13637 }, { "epoch": 0.4884058230522678, "grad_norm": 3.462437629699707, "learning_rate": 0.00010860162469037792, "loss": 1.6107, "step": 13638 }, { "epoch": 0.4884416351818361, "grad_norm": 2.0196216106414795, "learning_rate": 0.00010859006860862865, "loss": 1.5062, "step": 13639 }, { "epoch": 0.48847744731140436, "grad_norm": 1.4165526628494263, "learning_rate": 0.00010857851241131114, "loss": 1.389, "step": 13640 }, { "epoch": 0.4885132594409727, "grad_norm": 1.861929178237915, "learning_rate": 0.0001085669560985808, "loss": 1.6056, "step": 13641 }, { "epoch": 0.48854907157054095, "grad_norm": 2.1622304916381836, "learning_rate": 0.0001085553996705931, "loss": 1.9087, "step": 13642 }, { "epoch": 0.4885848837001092, "grad_norm": 1.8672003746032715, "learning_rate": 0.00010854384312750354, "loss": 1.3609, "step": 13643 }, { "epoch": 0.4886206958296775, "grad_norm": 2.506395101547241, "learning_rate": 0.00010853228646946758, "loss": 1.2844, "step": 13644 }, { "epoch": 0.4886565079592458, "grad_norm": 1.754264235496521, "learning_rate": 0.00010852072969664073, "loss": 1.551, "step": 13645 }, { "epoch": 0.4886923200888141, "grad_norm": 1.666245698928833, "learning_rate": 0.00010850917280917843, "loss": 1.0954, "step": 13646 }, { "epoch": 0.48872813221838235, "grad_norm": 1.6731163263320923, "learning_rate": 0.0001084976158072362, "loss": 1.7465, "step": 13647 }, { "epoch": 0.4887639443479507, "grad_norm": 1.6194301843643188, "learning_rate": 0.0001084860586909695, "loss": 1.1302, "step": 13648 }, { "epoch": 0.48879975647751894, "grad_norm": 1.5961239337921143, "learning_rate": 0.00010847450146053386, "loss": 1.3433, "step": 13649 }, { "epoch": 0.4888355686070872, "grad_norm": 1.8925201892852783, "learning_rate": 0.0001084629441160847, "loss": 1.5822, "step": 13650 }, { "epoch": 0.4888713807366555, "grad_norm": 1.876792550086975, "learning_rate": 0.00010845138665777754, "loss": 1.7623, "step": 13651 }, { "epoch": 0.4889071928662238, "grad_norm": 1.422800898551941, "learning_rate": 0.0001084398290857679, "loss": 1.3395, "step": 13652 }, { "epoch": 0.4889430049957921, "grad_norm": 2.462639808654785, "learning_rate": 0.00010842827140021121, "loss": 1.5845, "step": 13653 }, { "epoch": 0.48897881712536034, "grad_norm": 1.4146637916564941, "learning_rate": 0.00010841671360126304, "loss": 1.4434, "step": 13654 }, { "epoch": 0.48901462925492867, "grad_norm": 1.4680838584899902, "learning_rate": 0.0001084051556890788, "loss": 1.5377, "step": 13655 }, { "epoch": 0.48905044138449694, "grad_norm": 1.9159295558929443, "learning_rate": 0.0001083935976638141, "loss": 1.5379, "step": 13656 }, { "epoch": 0.4890862535140652, "grad_norm": 1.6047742366790771, "learning_rate": 0.00010838203952562432, "loss": 1.4923, "step": 13657 }, { "epoch": 0.4891220656436335, "grad_norm": 1.7296887636184692, "learning_rate": 0.00010837048127466505, "loss": 1.2314, "step": 13658 }, { "epoch": 0.4891578777732018, "grad_norm": 1.9508219957351685, "learning_rate": 0.00010835892291109169, "loss": 1.4367, "step": 13659 }, { "epoch": 0.48919368990277007, "grad_norm": 1.9842190742492676, "learning_rate": 0.00010834736443505986, "loss": 1.613, "step": 13660 }, { "epoch": 0.48922950203233834, "grad_norm": 1.6061060428619385, "learning_rate": 0.00010833580584672496, "loss": 1.2524, "step": 13661 }, { "epoch": 0.48926531416190666, "grad_norm": 1.3053823709487915, "learning_rate": 0.00010832424714624259, "loss": 1.4031, "step": 13662 }, { "epoch": 0.48930112629147493, "grad_norm": 1.4019272327423096, "learning_rate": 0.00010831268833376817, "loss": 1.3219, "step": 13663 }, { "epoch": 0.4893369384210432, "grad_norm": 1.8044565916061401, "learning_rate": 0.00010830112940945726, "loss": 1.5856, "step": 13664 }, { "epoch": 0.48937275055061147, "grad_norm": 2.211778163909912, "learning_rate": 0.00010828957037346538, "loss": 1.3458, "step": 13665 }, { "epoch": 0.4894085626801798, "grad_norm": 1.8323140144348145, "learning_rate": 0.00010827801122594802, "loss": 1.2703, "step": 13666 }, { "epoch": 0.48944437480974806, "grad_norm": 1.6370885372161865, "learning_rate": 0.00010826645196706074, "loss": 1.4509, "step": 13667 }, { "epoch": 0.48948018693931633, "grad_norm": 1.6698061227798462, "learning_rate": 0.00010825489259695894, "loss": 1.5467, "step": 13668 }, { "epoch": 0.48951599906888466, "grad_norm": 1.5696372985839844, "learning_rate": 0.00010824333311579824, "loss": 1.6898, "step": 13669 }, { "epoch": 0.4895518111984529, "grad_norm": 2.0624501705169678, "learning_rate": 0.00010823177352373412, "loss": 1.5877, "step": 13670 }, { "epoch": 0.4895876233280212, "grad_norm": 1.8093184232711792, "learning_rate": 0.00010822021382092211, "loss": 1.575, "step": 13671 }, { "epoch": 0.48962343545758946, "grad_norm": 1.586574912071228, "learning_rate": 0.00010820865400751772, "loss": 1.5977, "step": 13672 }, { "epoch": 0.4896592475871578, "grad_norm": 2.6463475227355957, "learning_rate": 0.00010819709408367649, "loss": 1.5361, "step": 13673 }, { "epoch": 0.48969505971672606, "grad_norm": 1.738493800163269, "learning_rate": 0.00010818553404955391, "loss": 1.6058, "step": 13674 }, { "epoch": 0.4897308718462943, "grad_norm": 1.6610356569290161, "learning_rate": 0.00010817397390530555, "loss": 1.608, "step": 13675 }, { "epoch": 0.48976668397586265, "grad_norm": 1.5615880489349365, "learning_rate": 0.00010816241365108692, "loss": 1.6981, "step": 13676 }, { "epoch": 0.4898024961054309, "grad_norm": 1.5565402507781982, "learning_rate": 0.00010815085328705352, "loss": 1.3005, "step": 13677 }, { "epoch": 0.4898383082349992, "grad_norm": 1.6391171216964722, "learning_rate": 0.00010813929281336092, "loss": 1.4786, "step": 13678 }, { "epoch": 0.48987412036456746, "grad_norm": 1.5482856035232544, "learning_rate": 0.00010812773223016461, "loss": 1.4095, "step": 13679 }, { "epoch": 0.4899099324941358, "grad_norm": 1.6968822479248047, "learning_rate": 0.00010811617153762017, "loss": 1.5978, "step": 13680 }, { "epoch": 0.48994574462370405, "grad_norm": 1.4903837442398071, "learning_rate": 0.0001081046107358831, "loss": 1.3526, "step": 13681 }, { "epoch": 0.4899815567532723, "grad_norm": 1.5468765497207642, "learning_rate": 0.00010809304982510897, "loss": 1.487, "step": 13682 }, { "epoch": 0.49001736888284064, "grad_norm": 2.329375743865967, "learning_rate": 0.00010808148880545325, "loss": 1.346, "step": 13683 }, { "epoch": 0.4900531810124089, "grad_norm": 1.809410810470581, "learning_rate": 0.00010806992767707155, "loss": 1.8127, "step": 13684 }, { "epoch": 0.4900889931419772, "grad_norm": 2.0929079055786133, "learning_rate": 0.00010805836644011939, "loss": 1.697, "step": 13685 }, { "epoch": 0.49012480527154545, "grad_norm": 1.9662256240844727, "learning_rate": 0.00010804680509475229, "loss": 1.3781, "step": 13686 }, { "epoch": 0.4901606174011138, "grad_norm": 1.429788589477539, "learning_rate": 0.00010803524364112583, "loss": 1.3815, "step": 13687 }, { "epoch": 0.49019642953068204, "grad_norm": 1.752447247505188, "learning_rate": 0.0001080236820793955, "loss": 1.3346, "step": 13688 }, { "epoch": 0.4902322416602503, "grad_norm": 1.527882695198059, "learning_rate": 0.00010801212040971691, "loss": 1.5413, "step": 13689 }, { "epoch": 0.49026805378981864, "grad_norm": 1.3394904136657715, "learning_rate": 0.00010800055863224555, "loss": 1.4344, "step": 13690 }, { "epoch": 0.4903038659193869, "grad_norm": 1.955166220664978, "learning_rate": 0.00010798899674713699, "loss": 1.4497, "step": 13691 }, { "epoch": 0.4903396780489552, "grad_norm": 1.8119986057281494, "learning_rate": 0.00010797743475454678, "loss": 1.7094, "step": 13692 }, { "epoch": 0.49037549017852344, "grad_norm": 1.3964793682098389, "learning_rate": 0.0001079658726546305, "loss": 1.4808, "step": 13693 }, { "epoch": 0.49041130230809177, "grad_norm": 2.0613391399383545, "learning_rate": 0.00010795431044754367, "loss": 1.2227, "step": 13694 }, { "epoch": 0.49044711443766004, "grad_norm": 1.5445024967193604, "learning_rate": 0.00010794274813344185, "loss": 1.5322, "step": 13695 }, { "epoch": 0.4904829265672283, "grad_norm": 1.8224143981933594, "learning_rate": 0.0001079311857124806, "loss": 1.3995, "step": 13696 }, { "epoch": 0.49051873869679663, "grad_norm": 2.142685890197754, "learning_rate": 0.00010791962318481547, "loss": 1.7053, "step": 13697 }, { "epoch": 0.4905545508263649, "grad_norm": 1.9301700592041016, "learning_rate": 0.00010790806055060205, "loss": 1.7767, "step": 13698 }, { "epoch": 0.49059036295593317, "grad_norm": 2.0079996585845947, "learning_rate": 0.00010789649780999585, "loss": 1.6479, "step": 13699 }, { "epoch": 0.49062617508550144, "grad_norm": 1.3682293891906738, "learning_rate": 0.00010788493496315246, "loss": 1.2197, "step": 13700 }, { "epoch": 0.49066198721506976, "grad_norm": 1.5623469352722168, "learning_rate": 0.00010787337201022745, "loss": 1.6502, "step": 13701 }, { "epoch": 0.49069779934463803, "grad_norm": 1.6920688152313232, "learning_rate": 0.00010786180895137639, "loss": 1.2194, "step": 13702 }, { "epoch": 0.4907336114742063, "grad_norm": 1.5415152311325073, "learning_rate": 0.0001078502457867548, "loss": 1.4212, "step": 13703 }, { "epoch": 0.4907694236037746, "grad_norm": 1.6158078908920288, "learning_rate": 0.00010783868251651833, "loss": 1.3743, "step": 13704 }, { "epoch": 0.4908052357333429, "grad_norm": 1.6433581113815308, "learning_rate": 0.00010782711914082242, "loss": 1.4155, "step": 13705 }, { "epoch": 0.49084104786291116, "grad_norm": 1.4370219707489014, "learning_rate": 0.00010781555565982276, "loss": 1.429, "step": 13706 }, { "epoch": 0.49087685999247943, "grad_norm": 1.5507996082305908, "learning_rate": 0.00010780399207367489, "loss": 1.1886, "step": 13707 }, { "epoch": 0.49091267212204776, "grad_norm": 1.837388515472412, "learning_rate": 0.00010779242838253433, "loss": 1.5502, "step": 13708 }, { "epoch": 0.490948484251616, "grad_norm": 1.5387765169143677, "learning_rate": 0.00010778086458655677, "loss": 1.5225, "step": 13709 }, { "epoch": 0.4909842963811843, "grad_norm": 2.5038530826568604, "learning_rate": 0.00010776930068589764, "loss": 1.6695, "step": 13710 }, { "epoch": 0.4910201085107526, "grad_norm": 2.6884608268737793, "learning_rate": 0.00010775773668071265, "loss": 1.4632, "step": 13711 }, { "epoch": 0.4910559206403209, "grad_norm": 1.9262381792068481, "learning_rate": 0.00010774617257115728, "loss": 1.0545, "step": 13712 }, { "epoch": 0.49109173276988916, "grad_norm": 1.8971209526062012, "learning_rate": 0.00010773460835738718, "loss": 1.4246, "step": 13713 }, { "epoch": 0.4911275448994574, "grad_norm": 1.7927980422973633, "learning_rate": 0.00010772304403955789, "loss": 1.4387, "step": 13714 }, { "epoch": 0.49116335702902575, "grad_norm": 1.5163729190826416, "learning_rate": 0.000107711479617825, "loss": 1.3497, "step": 13715 }, { "epoch": 0.491199169158594, "grad_norm": 1.4729920625686646, "learning_rate": 0.00010769991509234408, "loss": 1.6522, "step": 13716 }, { "epoch": 0.4912349812881623, "grad_norm": 1.7279658317565918, "learning_rate": 0.00010768835046327077, "loss": 1.1325, "step": 13717 }, { "epoch": 0.4912707934177306, "grad_norm": 1.7967742681503296, "learning_rate": 0.00010767678573076058, "loss": 1.6004, "step": 13718 }, { "epoch": 0.4913066055472989, "grad_norm": 2.122061252593994, "learning_rate": 0.00010766522089496915, "loss": 1.5491, "step": 13719 }, { "epoch": 0.49134241767686715, "grad_norm": 1.501920461654663, "learning_rate": 0.00010765365595605212, "loss": 1.5364, "step": 13720 }, { "epoch": 0.4913782298064354, "grad_norm": 1.8357949256896973, "learning_rate": 0.00010764209091416497, "loss": 1.5825, "step": 13721 }, { "epoch": 0.49141404193600374, "grad_norm": 1.3038849830627441, "learning_rate": 0.00010763052576946335, "loss": 1.5351, "step": 13722 }, { "epoch": 0.491449854065572, "grad_norm": 2.2974419593811035, "learning_rate": 0.00010761896052210285, "loss": 1.3127, "step": 13723 }, { "epoch": 0.4914856661951403, "grad_norm": 1.4514243602752686, "learning_rate": 0.00010760739517223908, "loss": 1.3836, "step": 13724 }, { "epoch": 0.4915214783247086, "grad_norm": 2.4426000118255615, "learning_rate": 0.00010759582972002758, "loss": 1.7502, "step": 13725 }, { "epoch": 0.4915572904542769, "grad_norm": 1.43705153465271, "learning_rate": 0.00010758426416562402, "loss": 1.6914, "step": 13726 }, { "epoch": 0.49159310258384514, "grad_norm": 2.030698776245117, "learning_rate": 0.00010757269850918394, "loss": 1.3523, "step": 13727 }, { "epoch": 0.4916289147134134, "grad_norm": 2.493643045425415, "learning_rate": 0.00010756113275086302, "loss": 1.5557, "step": 13728 }, { "epoch": 0.49166472684298174, "grad_norm": 2.235466241836548, "learning_rate": 0.00010754956689081678, "loss": 1.7038, "step": 13729 }, { "epoch": 0.49170053897255, "grad_norm": 1.6229029893875122, "learning_rate": 0.00010753800092920086, "loss": 1.5441, "step": 13730 }, { "epoch": 0.4917363511021183, "grad_norm": 1.7201504707336426, "learning_rate": 0.00010752643486617086, "loss": 1.6628, "step": 13731 }, { "epoch": 0.4917721632316866, "grad_norm": 2.171365737915039, "learning_rate": 0.00010751486870188239, "loss": 1.4995, "step": 13732 }, { "epoch": 0.49180797536125487, "grad_norm": 2.607131004333496, "learning_rate": 0.00010750330243649104, "loss": 1.6046, "step": 13733 }, { "epoch": 0.49184378749082314, "grad_norm": 2.4226737022399902, "learning_rate": 0.00010749173607015247, "loss": 1.4492, "step": 13734 }, { "epoch": 0.4918795996203914, "grad_norm": 1.5893970727920532, "learning_rate": 0.00010748016960302223, "loss": 1.403, "step": 13735 }, { "epoch": 0.49191541174995973, "grad_norm": 1.5359864234924316, "learning_rate": 0.00010746860303525595, "loss": 1.6285, "step": 13736 }, { "epoch": 0.491951223879528, "grad_norm": 2.033862829208374, "learning_rate": 0.00010745703636700926, "loss": 1.6867, "step": 13737 }, { "epoch": 0.49198703600909627, "grad_norm": 1.6129465103149414, "learning_rate": 0.00010744546959843777, "loss": 1.6712, "step": 13738 }, { "epoch": 0.4920228481386646, "grad_norm": 1.5458647012710571, "learning_rate": 0.00010743390272969706, "loss": 1.4477, "step": 13739 }, { "epoch": 0.49205866026823286, "grad_norm": 1.4577420949935913, "learning_rate": 0.00010742233576094283, "loss": 1.3451, "step": 13740 }, { "epoch": 0.49209447239780113, "grad_norm": 2.2439849376678467, "learning_rate": 0.0001074107686923306, "loss": 1.5242, "step": 13741 }, { "epoch": 0.4921302845273694, "grad_norm": 1.4966247081756592, "learning_rate": 0.00010739920152401605, "loss": 1.8109, "step": 13742 }, { "epoch": 0.4921660966569377, "grad_norm": 1.5827927589416504, "learning_rate": 0.00010738763425615479, "loss": 1.2532, "step": 13743 }, { "epoch": 0.492201908786506, "grad_norm": 1.451015591621399, "learning_rate": 0.00010737606688890245, "loss": 1.5968, "step": 13744 }, { "epoch": 0.49223772091607426, "grad_norm": 1.5125499963760376, "learning_rate": 0.00010736449942241465, "loss": 1.3855, "step": 13745 }, { "epoch": 0.4922735330456426, "grad_norm": 1.5985307693481445, "learning_rate": 0.000107352931856847, "loss": 1.4018, "step": 13746 }, { "epoch": 0.49230934517521086, "grad_norm": 1.6459596157073975, "learning_rate": 0.00010734136419235512, "loss": 1.4337, "step": 13747 }, { "epoch": 0.4923451573047791, "grad_norm": 2.634399652481079, "learning_rate": 0.00010732979642909466, "loss": 1.4944, "step": 13748 }, { "epoch": 0.4923809694343474, "grad_norm": 1.453285574913025, "learning_rate": 0.00010731822856722127, "loss": 1.595, "step": 13749 }, { "epoch": 0.4924167815639157, "grad_norm": 2.0285205841064453, "learning_rate": 0.00010730666060689053, "loss": 1.8837, "step": 13750 }, { "epoch": 0.492452593693484, "grad_norm": 1.861598253250122, "learning_rate": 0.00010729509254825811, "loss": 1.6692, "step": 13751 }, { "epoch": 0.49248840582305226, "grad_norm": 2.062983751296997, "learning_rate": 0.00010728352439147959, "loss": 1.6496, "step": 13752 }, { "epoch": 0.4925242179526206, "grad_norm": 2.475240707397461, "learning_rate": 0.00010727195613671071, "loss": 1.6116, "step": 13753 }, { "epoch": 0.49256003008218885, "grad_norm": 1.5981770753860474, "learning_rate": 0.00010726038778410699, "loss": 1.3383, "step": 13754 }, { "epoch": 0.4925958422117571, "grad_norm": 1.3601890802383423, "learning_rate": 0.00010724881933382416, "loss": 1.55, "step": 13755 }, { "epoch": 0.4926316543413254, "grad_norm": 1.8555890321731567, "learning_rate": 0.00010723725078601778, "loss": 1.701, "step": 13756 }, { "epoch": 0.4926674664708937, "grad_norm": 1.4763880968093872, "learning_rate": 0.00010722568214084354, "loss": 1.7419, "step": 13757 }, { "epoch": 0.492703278600462, "grad_norm": 1.3820867538452148, "learning_rate": 0.00010721411339845707, "loss": 1.4405, "step": 13758 }, { "epoch": 0.49273909073003025, "grad_norm": 1.5505809783935547, "learning_rate": 0.00010720254455901399, "loss": 1.3917, "step": 13759 }, { "epoch": 0.4927749028595986, "grad_norm": 3.055396556854248, "learning_rate": 0.00010719097562266998, "loss": 1.2911, "step": 13760 }, { "epoch": 0.49281071498916684, "grad_norm": 1.4654674530029297, "learning_rate": 0.00010717940658958066, "loss": 1.5803, "step": 13761 }, { "epoch": 0.4928465271187351, "grad_norm": 1.8840539455413818, "learning_rate": 0.00010716783745990169, "loss": 1.2817, "step": 13762 }, { "epoch": 0.4928823392483034, "grad_norm": 1.674445390701294, "learning_rate": 0.0001071562682337887, "loss": 1.5135, "step": 13763 }, { "epoch": 0.4929181513778717, "grad_norm": 1.557632327079773, "learning_rate": 0.0001071446989113974, "loss": 1.4121, "step": 13764 }, { "epoch": 0.49295396350744, "grad_norm": 1.402957558631897, "learning_rate": 0.00010713312949288334, "loss": 1.3646, "step": 13765 }, { "epoch": 0.49298977563700824, "grad_norm": 1.951153039932251, "learning_rate": 0.00010712155997840225, "loss": 1.468, "step": 13766 }, { "epoch": 0.4930255877665765, "grad_norm": 1.7936666011810303, "learning_rate": 0.00010710999036810975, "loss": 1.123, "step": 13767 }, { "epoch": 0.49306139989614484, "grad_norm": 1.553545594215393, "learning_rate": 0.00010709842066216151, "loss": 1.1884, "step": 13768 }, { "epoch": 0.4930972120257131, "grad_norm": 1.3447620868682861, "learning_rate": 0.00010708685086071316, "loss": 1.2112, "step": 13769 }, { "epoch": 0.4931330241552814, "grad_norm": 2.356231212615967, "learning_rate": 0.00010707528096392038, "loss": 1.3566, "step": 13770 }, { "epoch": 0.4931688362848497, "grad_norm": 1.819908618927002, "learning_rate": 0.00010706371097193881, "loss": 1.7402, "step": 13771 }, { "epoch": 0.49320464841441797, "grad_norm": 2.1119303703308105, "learning_rate": 0.00010705214088492415, "loss": 1.3566, "step": 13772 }, { "epoch": 0.49324046054398624, "grad_norm": 1.6259195804595947, "learning_rate": 0.00010704057070303201, "loss": 1.4016, "step": 13773 }, { "epoch": 0.4932762726735545, "grad_norm": 1.7888505458831787, "learning_rate": 0.00010702900042641806, "loss": 1.5483, "step": 13774 }, { "epoch": 0.49331208480312283, "grad_norm": 1.7072391510009766, "learning_rate": 0.00010701743005523801, "loss": 1.4431, "step": 13775 }, { "epoch": 0.4933478969326911, "grad_norm": 2.1504554748535156, "learning_rate": 0.00010700585958964744, "loss": 1.7045, "step": 13776 }, { "epoch": 0.49338370906225937, "grad_norm": 2.100933790206909, "learning_rate": 0.00010699428902980211, "loss": 1.7934, "step": 13777 }, { "epoch": 0.4934195211918277, "grad_norm": 1.4316816329956055, "learning_rate": 0.00010698271837585762, "loss": 1.3737, "step": 13778 }, { "epoch": 0.49345533332139596, "grad_norm": 1.4523588418960571, "learning_rate": 0.0001069711476279697, "loss": 1.533, "step": 13779 }, { "epoch": 0.49349114545096423, "grad_norm": 1.4418319463729858, "learning_rate": 0.00010695957678629391, "loss": 1.3883, "step": 13780 }, { "epoch": 0.4935269575805325, "grad_norm": 1.8043620586395264, "learning_rate": 0.00010694800585098606, "loss": 1.4568, "step": 13781 }, { "epoch": 0.4935627697101008, "grad_norm": 1.7246334552764893, "learning_rate": 0.00010693643482220173, "loss": 1.6841, "step": 13782 }, { "epoch": 0.4935985818396691, "grad_norm": 1.7109289169311523, "learning_rate": 0.0001069248637000966, "loss": 1.3743, "step": 13783 }, { "epoch": 0.49363439396923736, "grad_norm": 1.5216609239578247, "learning_rate": 0.0001069132924848264, "loss": 1.3224, "step": 13784 }, { "epoch": 0.4936702060988057, "grad_norm": 1.9945144653320312, "learning_rate": 0.00010690172117654672, "loss": 1.5012, "step": 13785 }, { "epoch": 0.49370601822837396, "grad_norm": 1.4590290784835815, "learning_rate": 0.00010689014977541332, "loss": 1.408, "step": 13786 }, { "epoch": 0.4937418303579422, "grad_norm": 1.3395782709121704, "learning_rate": 0.00010687857828158182, "loss": 1.5821, "step": 13787 }, { "epoch": 0.4937776424875105, "grad_norm": 1.3145579099655151, "learning_rate": 0.00010686700669520792, "loss": 1.5071, "step": 13788 }, { "epoch": 0.4938134546170788, "grad_norm": 1.4483972787857056, "learning_rate": 0.00010685543501644732, "loss": 1.36, "step": 13789 }, { "epoch": 0.4938492667466471, "grad_norm": 5.216784477233887, "learning_rate": 0.00010684386324545567, "loss": 1.8394, "step": 13790 }, { "epoch": 0.49388507887621536, "grad_norm": 1.2942969799041748, "learning_rate": 0.0001068322913823887, "loss": 1.1399, "step": 13791 }, { "epoch": 0.4939208910057837, "grad_norm": 1.4806745052337646, "learning_rate": 0.00010682071942740202, "loss": 1.2029, "step": 13792 }, { "epoch": 0.49395670313535195, "grad_norm": 1.8519744873046875, "learning_rate": 0.0001068091473806514, "loss": 1.3285, "step": 13793 }, { "epoch": 0.4939925152649202, "grad_norm": 1.7038651704788208, "learning_rate": 0.00010679757524229244, "loss": 1.3596, "step": 13794 }, { "epoch": 0.4940283273944885, "grad_norm": 1.638228178024292, "learning_rate": 0.0001067860030124809, "loss": 1.328, "step": 13795 }, { "epoch": 0.4940641395240568, "grad_norm": 1.7471988201141357, "learning_rate": 0.00010677443069137242, "loss": 1.2233, "step": 13796 }, { "epoch": 0.4940999516536251, "grad_norm": 2.5956523418426514, "learning_rate": 0.00010676285827912276, "loss": 1.2647, "step": 13797 }, { "epoch": 0.49413576378319335, "grad_norm": 1.6266028881072998, "learning_rate": 0.00010675128577588751, "loss": 1.6781, "step": 13798 }, { "epoch": 0.4941715759127617, "grad_norm": 1.3634744882583618, "learning_rate": 0.00010673971318182247, "loss": 1.6182, "step": 13799 }, { "epoch": 0.49420738804232994, "grad_norm": 1.7638877630233765, "learning_rate": 0.00010672814049708326, "loss": 1.5701, "step": 13800 }, { "epoch": 0.4942432001718982, "grad_norm": 1.6434555053710938, "learning_rate": 0.0001067165677218256, "loss": 1.2647, "step": 13801 }, { "epoch": 0.4942790123014665, "grad_norm": 1.9135644435882568, "learning_rate": 0.00010670499485620517, "loss": 1.5567, "step": 13802 }, { "epoch": 0.4943148244310348, "grad_norm": 1.8451272249221802, "learning_rate": 0.0001066934219003777, "loss": 1.2508, "step": 13803 }, { "epoch": 0.4943506365606031, "grad_norm": 1.3484454154968262, "learning_rate": 0.00010668184885449886, "loss": 1.4343, "step": 13804 }, { "epoch": 0.49438644869017134, "grad_norm": 1.644727110862732, "learning_rate": 0.00010667027571872436, "loss": 1.2379, "step": 13805 }, { "epoch": 0.49442226081973967, "grad_norm": 1.5474905967712402, "learning_rate": 0.00010665870249320993, "loss": 1.5323, "step": 13806 }, { "epoch": 0.49445807294930794, "grad_norm": 1.9647862911224365, "learning_rate": 0.00010664712917811121, "loss": 1.7564, "step": 13807 }, { "epoch": 0.4944938850788762, "grad_norm": 1.648308515548706, "learning_rate": 0.000106635555773584, "loss": 1.36, "step": 13808 }, { "epoch": 0.4945296972084445, "grad_norm": 1.834714651107788, "learning_rate": 0.00010662398227978389, "loss": 1.223, "step": 13809 }, { "epoch": 0.4945655093380128, "grad_norm": 1.4639192819595337, "learning_rate": 0.00010661240869686669, "loss": 1.4133, "step": 13810 }, { "epoch": 0.49460132146758107, "grad_norm": 1.9548790454864502, "learning_rate": 0.00010660083502498801, "loss": 1.2963, "step": 13811 }, { "epoch": 0.49463713359714934, "grad_norm": 1.7296000719070435, "learning_rate": 0.00010658926126430364, "loss": 1.6591, "step": 13812 }, { "epoch": 0.49467294572671766, "grad_norm": 1.7165069580078125, "learning_rate": 0.00010657768741496923, "loss": 1.3809, "step": 13813 }, { "epoch": 0.49470875785628593, "grad_norm": 1.616808295249939, "learning_rate": 0.00010656611347714056, "loss": 1.647, "step": 13814 }, { "epoch": 0.4947445699858542, "grad_norm": 2.2919797897338867, "learning_rate": 0.00010655453945097327, "loss": 1.8562, "step": 13815 }, { "epoch": 0.49478038211542247, "grad_norm": 2.0228257179260254, "learning_rate": 0.0001065429653366231, "loss": 1.5348, "step": 13816 }, { "epoch": 0.4948161942449908, "grad_norm": 2.164900064468384, "learning_rate": 0.00010653139113424581, "loss": 1.5874, "step": 13817 }, { "epoch": 0.49485200637455906, "grad_norm": 1.9388035535812378, "learning_rate": 0.00010651981684399705, "loss": 1.3361, "step": 13818 }, { "epoch": 0.49488781850412733, "grad_norm": 2.0324113368988037, "learning_rate": 0.0001065082424660326, "loss": 1.2808, "step": 13819 }, { "epoch": 0.49492363063369565, "grad_norm": 1.7220652103424072, "learning_rate": 0.00010649666800050808, "loss": 1.3884, "step": 13820 }, { "epoch": 0.4949594427632639, "grad_norm": 2.1359825134277344, "learning_rate": 0.00010648509344757933, "loss": 1.4864, "step": 13821 }, { "epoch": 0.4949952548928322, "grad_norm": 1.7690446376800537, "learning_rate": 0.00010647351880740197, "loss": 1.5202, "step": 13822 }, { "epoch": 0.49503106702240046, "grad_norm": 1.6136664152145386, "learning_rate": 0.00010646194408013179, "loss": 1.4969, "step": 13823 }, { "epoch": 0.4950668791519688, "grad_norm": 1.6835991144180298, "learning_rate": 0.00010645036926592449, "loss": 1.3849, "step": 13824 }, { "epoch": 0.49510269128153706, "grad_norm": 2.4620182514190674, "learning_rate": 0.00010643879436493578, "loss": 1.602, "step": 13825 }, { "epoch": 0.4951385034111053, "grad_norm": 2.525754928588867, "learning_rate": 0.0001064272193773214, "loss": 1.4661, "step": 13826 }, { "epoch": 0.49517431554067365, "grad_norm": 2.1498541831970215, "learning_rate": 0.00010641564430323707, "loss": 1.4336, "step": 13827 }, { "epoch": 0.4952101276702419, "grad_norm": 2.0926995277404785, "learning_rate": 0.00010640406914283854, "loss": 1.2882, "step": 13828 }, { "epoch": 0.4952459397998102, "grad_norm": 2.03147554397583, "learning_rate": 0.00010639249389628149, "loss": 1.7856, "step": 13829 }, { "epoch": 0.49528175192937846, "grad_norm": 1.6365413665771484, "learning_rate": 0.00010638091856372172, "loss": 1.3358, "step": 13830 }, { "epoch": 0.4953175640589468, "grad_norm": 1.5908212661743164, "learning_rate": 0.00010636934314531488, "loss": 1.7253, "step": 13831 }, { "epoch": 0.49535337618851505, "grad_norm": 1.616167664527893, "learning_rate": 0.00010635776764121677, "loss": 1.6732, "step": 13832 }, { "epoch": 0.4953891883180833, "grad_norm": 1.6805557012557983, "learning_rate": 0.00010634619205158307, "loss": 1.7188, "step": 13833 }, { "epoch": 0.49542500044765164, "grad_norm": 1.613982081413269, "learning_rate": 0.00010633461637656958, "loss": 1.4378, "step": 13834 }, { "epoch": 0.4954608125772199, "grad_norm": 1.827687382698059, "learning_rate": 0.00010632304061633199, "loss": 1.4883, "step": 13835 }, { "epoch": 0.4954966247067882, "grad_norm": 1.3360340595245361, "learning_rate": 0.00010631146477102602, "loss": 1.2353, "step": 13836 }, { "epoch": 0.49553243683635645, "grad_norm": 1.3688260316848755, "learning_rate": 0.00010629988884080745, "loss": 1.4722, "step": 13837 }, { "epoch": 0.4955682489659248, "grad_norm": 2.0016164779663086, "learning_rate": 0.00010628831282583201, "loss": 1.2342, "step": 13838 }, { "epoch": 0.49560406109549304, "grad_norm": 2.039696216583252, "learning_rate": 0.00010627673672625542, "loss": 1.5256, "step": 13839 }, { "epoch": 0.4956398732250613, "grad_norm": 1.414087176322937, "learning_rate": 0.00010626516054223341, "loss": 1.5765, "step": 13840 }, { "epoch": 0.49567568535462964, "grad_norm": 1.4707834720611572, "learning_rate": 0.0001062535842739218, "loss": 1.3901, "step": 13841 }, { "epoch": 0.4957114974841979, "grad_norm": 1.269789695739746, "learning_rate": 0.00010624200792147622, "loss": 1.707, "step": 13842 }, { "epoch": 0.4957473096137662, "grad_norm": 1.7977982759475708, "learning_rate": 0.00010623043148505254, "loss": 1.3614, "step": 13843 }, { "epoch": 0.49578312174333444, "grad_norm": 2.498763084411621, "learning_rate": 0.00010621885496480641, "loss": 1.4808, "step": 13844 }, { "epoch": 0.49581893387290277, "grad_norm": 1.9062105417251587, "learning_rate": 0.00010620727836089359, "loss": 1.6724, "step": 13845 }, { "epoch": 0.49585474600247104, "grad_norm": 1.6607482433319092, "learning_rate": 0.00010619570167346987, "loss": 1.3214, "step": 13846 }, { "epoch": 0.4958905581320393, "grad_norm": 1.4918594360351562, "learning_rate": 0.00010618412490269096, "loss": 1.4561, "step": 13847 }, { "epoch": 0.49592637026160763, "grad_norm": 1.8576520681381226, "learning_rate": 0.00010617254804871264, "loss": 1.4214, "step": 13848 }, { "epoch": 0.4959621823911759, "grad_norm": 1.425275444984436, "learning_rate": 0.00010616097111169063, "loss": 1.4187, "step": 13849 }, { "epoch": 0.49599799452074417, "grad_norm": 1.8478344678878784, "learning_rate": 0.00010614939409178072, "loss": 1.4093, "step": 13850 }, { "epoch": 0.49603380665031244, "grad_norm": 2.1850032806396484, "learning_rate": 0.00010613781698913863, "loss": 1.2745, "step": 13851 }, { "epoch": 0.49606961877988076, "grad_norm": 1.5047380924224854, "learning_rate": 0.00010612623980392016, "loss": 1.1031, "step": 13852 }, { "epoch": 0.49610543090944903, "grad_norm": 1.6822580099105835, "learning_rate": 0.00010611466253628101, "loss": 1.4585, "step": 13853 }, { "epoch": 0.4961412430390173, "grad_norm": 1.5494749546051025, "learning_rate": 0.00010610308518637697, "loss": 1.3333, "step": 13854 }, { "epoch": 0.4961770551685856, "grad_norm": 1.5541067123413086, "learning_rate": 0.00010609150775436378, "loss": 1.5149, "step": 13855 }, { "epoch": 0.4962128672981539, "grad_norm": 1.8944567441940308, "learning_rate": 0.00010607993024039722, "loss": 1.2425, "step": 13856 }, { "epoch": 0.49624867942772216, "grad_norm": 1.1840757131576538, "learning_rate": 0.00010606835264463305, "loss": 1.4237, "step": 13857 }, { "epoch": 0.49628449155729043, "grad_norm": 2.1415929794311523, "learning_rate": 0.00010605677496722699, "loss": 1.7492, "step": 13858 }, { "epoch": 0.49632030368685875, "grad_norm": 1.6626441478729248, "learning_rate": 0.00010604519720833486, "loss": 1.6241, "step": 13859 }, { "epoch": 0.496356115816427, "grad_norm": 1.551418423652649, "learning_rate": 0.00010603361936811239, "loss": 1.5366, "step": 13860 }, { "epoch": 0.4963919279459953, "grad_norm": 2.124976396560669, "learning_rate": 0.00010602204144671539, "loss": 1.6503, "step": 13861 }, { "epoch": 0.4964277400755636, "grad_norm": 2.3942666053771973, "learning_rate": 0.00010601046344429955, "loss": 1.5967, "step": 13862 }, { "epoch": 0.4964635522051319, "grad_norm": 2.203838586807251, "learning_rate": 0.0001059988853610207, "loss": 1.3865, "step": 13863 }, { "epoch": 0.49649936433470016, "grad_norm": 1.3743383884429932, "learning_rate": 0.00010598730719703456, "loss": 1.78, "step": 13864 }, { "epoch": 0.4965351764642684, "grad_norm": 1.5764050483703613, "learning_rate": 0.00010597572895249694, "loss": 1.1433, "step": 13865 }, { "epoch": 0.49657098859383675, "grad_norm": 2.720665454864502, "learning_rate": 0.00010596415062756358, "loss": 1.4423, "step": 13866 }, { "epoch": 0.496606800723405, "grad_norm": 1.9178234338760376, "learning_rate": 0.0001059525722223903, "loss": 1.5331, "step": 13867 }, { "epoch": 0.4966426128529733, "grad_norm": 1.4973068237304688, "learning_rate": 0.0001059409937371328, "loss": 1.5225, "step": 13868 }, { "epoch": 0.4966784249825416, "grad_norm": 1.7021870613098145, "learning_rate": 0.00010592941517194692, "loss": 1.3905, "step": 13869 }, { "epoch": 0.4967142371121099, "grad_norm": 1.600563645362854, "learning_rate": 0.00010591783652698841, "loss": 1.3413, "step": 13870 }, { "epoch": 0.49675004924167815, "grad_norm": 1.8694027662277222, "learning_rate": 0.00010590625780241302, "loss": 1.4979, "step": 13871 }, { "epoch": 0.4967858613712464, "grad_norm": 1.3049858808517456, "learning_rate": 0.00010589467899837657, "loss": 1.2021, "step": 13872 }, { "epoch": 0.49682167350081474, "grad_norm": 1.7500213384628296, "learning_rate": 0.0001058831001150348, "loss": 1.4798, "step": 13873 }, { "epoch": 0.496857485630383, "grad_norm": 1.521600365638733, "learning_rate": 0.00010587152115254353, "loss": 1.5667, "step": 13874 }, { "epoch": 0.4968932977599513, "grad_norm": 1.659149408340454, "learning_rate": 0.0001058599421110585, "loss": 1.3085, "step": 13875 }, { "epoch": 0.4969291098895196, "grad_norm": 1.9441237449645996, "learning_rate": 0.0001058483629907355, "loss": 1.5502, "step": 13876 }, { "epoch": 0.4969649220190879, "grad_norm": 1.8426682949066162, "learning_rate": 0.00010583678379173032, "loss": 1.7238, "step": 13877 }, { "epoch": 0.49700073414865614, "grad_norm": 2.1774513721466064, "learning_rate": 0.00010582520451419877, "loss": 1.4809, "step": 13878 }, { "epoch": 0.4970365462782244, "grad_norm": 1.3162360191345215, "learning_rate": 0.0001058136251582966, "loss": 1.573, "step": 13879 }, { "epoch": 0.49707235840779274, "grad_norm": 1.2653931379318237, "learning_rate": 0.00010580204572417957, "loss": 1.6874, "step": 13880 }, { "epoch": 0.497108170537361, "grad_norm": 2.2073793411254883, "learning_rate": 0.00010579046621200355, "loss": 1.8626, "step": 13881 }, { "epoch": 0.4971439826669293, "grad_norm": 1.9331858158111572, "learning_rate": 0.00010577888662192424, "loss": 1.6169, "step": 13882 }, { "epoch": 0.4971797947964976, "grad_norm": 1.5831600427627563, "learning_rate": 0.00010576730695409747, "loss": 1.5547, "step": 13883 }, { "epoch": 0.49721560692606587, "grad_norm": 1.8316388130187988, "learning_rate": 0.00010575572720867901, "loss": 1.7315, "step": 13884 }, { "epoch": 0.49725141905563414, "grad_norm": 1.7903798818588257, "learning_rate": 0.0001057441473858247, "loss": 1.4935, "step": 13885 }, { "epoch": 0.4972872311852024, "grad_norm": 1.3750848770141602, "learning_rate": 0.00010573256748569027, "loss": 1.3742, "step": 13886 }, { "epoch": 0.49732304331477073, "grad_norm": 2.335555076599121, "learning_rate": 0.00010572098750843155, "loss": 1.5615, "step": 13887 }, { "epoch": 0.497358855444339, "grad_norm": 1.5992259979248047, "learning_rate": 0.00010570940745420433, "loss": 1.6146, "step": 13888 }, { "epoch": 0.49739466757390727, "grad_norm": 1.6702439785003662, "learning_rate": 0.00010569782732316438, "loss": 1.443, "step": 13889 }, { "epoch": 0.4974304797034756, "grad_norm": 1.9406352043151855, "learning_rate": 0.00010568624711546752, "loss": 1.4274, "step": 13890 }, { "epoch": 0.49746629183304386, "grad_norm": 1.4451533555984497, "learning_rate": 0.00010567466683126952, "loss": 1.5557, "step": 13891 }, { "epoch": 0.49750210396261213, "grad_norm": 2.7517757415771484, "learning_rate": 0.00010566308647072624, "loss": 1.5878, "step": 13892 }, { "epoch": 0.4975379160921804, "grad_norm": 1.9673051834106445, "learning_rate": 0.0001056515060339934, "loss": 1.7578, "step": 13893 }, { "epoch": 0.4975737282217487, "grad_norm": 1.653322458267212, "learning_rate": 0.00010563992552122686, "loss": 1.2221, "step": 13894 }, { "epoch": 0.497609540351317, "grad_norm": 1.8169872760772705, "learning_rate": 0.00010562834493258237, "loss": 1.4192, "step": 13895 }, { "epoch": 0.49764535248088526, "grad_norm": 1.7515963315963745, "learning_rate": 0.00010561676426821581, "loss": 1.4524, "step": 13896 }, { "epoch": 0.4976811646104536, "grad_norm": 1.6058905124664307, "learning_rate": 0.00010560518352828288, "loss": 1.5834, "step": 13897 }, { "epoch": 0.49771697674002185, "grad_norm": 2.050147294998169, "learning_rate": 0.00010559360271293947, "loss": 1.3106, "step": 13898 }, { "epoch": 0.4977527888695901, "grad_norm": 1.8379297256469727, "learning_rate": 0.00010558202182234132, "loss": 1.6386, "step": 13899 }, { "epoch": 0.4977886009991584, "grad_norm": 1.9180848598480225, "learning_rate": 0.00010557044085664428, "loss": 1.4842, "step": 13900 }, { "epoch": 0.4978244131287267, "grad_norm": 1.9606422185897827, "learning_rate": 0.00010555885981600416, "loss": 1.225, "step": 13901 }, { "epoch": 0.497860225258295, "grad_norm": 1.8793365955352783, "learning_rate": 0.00010554727870057671, "loss": 1.6759, "step": 13902 }, { "epoch": 0.49789603738786326, "grad_norm": 2.0016098022460938, "learning_rate": 0.00010553569751051782, "loss": 1.4724, "step": 13903 }, { "epoch": 0.4979318495174316, "grad_norm": 2.300502061843872, "learning_rate": 0.00010552411624598325, "loss": 1.3582, "step": 13904 }, { "epoch": 0.49796766164699985, "grad_norm": 2.5776402950286865, "learning_rate": 0.00010551253490712882, "loss": 1.529, "step": 13905 }, { "epoch": 0.4980034737765681, "grad_norm": 1.8135322332382202, "learning_rate": 0.00010550095349411033, "loss": 1.3956, "step": 13906 }, { "epoch": 0.4980392859061364, "grad_norm": 1.6198707818984985, "learning_rate": 0.00010548937200708365, "loss": 1.348, "step": 13907 }, { "epoch": 0.4980750980357047, "grad_norm": 1.9541133642196655, "learning_rate": 0.0001054777904462045, "loss": 1.4611, "step": 13908 }, { "epoch": 0.498110910165273, "grad_norm": 1.7286028861999512, "learning_rate": 0.00010546620881162876, "loss": 1.1831, "step": 13909 }, { "epoch": 0.49814672229484125, "grad_norm": 1.5478944778442383, "learning_rate": 0.00010545462710351224, "loss": 1.5389, "step": 13910 }, { "epoch": 0.4981825344244096, "grad_norm": 1.9012272357940674, "learning_rate": 0.00010544304532201075, "loss": 1.2058, "step": 13911 }, { "epoch": 0.49821834655397784, "grad_norm": 1.586827039718628, "learning_rate": 0.0001054314634672801, "loss": 1.8115, "step": 13912 }, { "epoch": 0.4982541586835461, "grad_norm": 1.4416097402572632, "learning_rate": 0.00010541988153947609, "loss": 1.4744, "step": 13913 }, { "epoch": 0.4982899708131144, "grad_norm": 1.427348256111145, "learning_rate": 0.00010540829953875462, "loss": 1.3261, "step": 13914 }, { "epoch": 0.4983257829426827, "grad_norm": 1.6142998933792114, "learning_rate": 0.00010539671746527142, "loss": 1.4967, "step": 13915 }, { "epoch": 0.498361595072251, "grad_norm": 1.6555075645446777, "learning_rate": 0.00010538513531918237, "loss": 1.2144, "step": 13916 }, { "epoch": 0.49839740720181924, "grad_norm": 1.8272299766540527, "learning_rate": 0.00010537355310064323, "loss": 1.3989, "step": 13917 }, { "epoch": 0.49843321933138757, "grad_norm": 1.2751270532608032, "learning_rate": 0.00010536197080980991, "loss": 1.6073, "step": 13918 }, { "epoch": 0.49846903146095584, "grad_norm": 1.965511679649353, "learning_rate": 0.00010535038844683816, "loss": 1.6124, "step": 13919 }, { "epoch": 0.4985048435905241, "grad_norm": 1.3621838092803955, "learning_rate": 0.00010533880601188384, "loss": 1.436, "step": 13920 }, { "epoch": 0.4985406557200924, "grad_norm": 2.440661668777466, "learning_rate": 0.00010532722350510277, "loss": 1.3973, "step": 13921 }, { "epoch": 0.4985764678496607, "grad_norm": 1.5295976400375366, "learning_rate": 0.00010531564092665079, "loss": 1.5994, "step": 13922 }, { "epoch": 0.49861227997922897, "grad_norm": 2.2189676761627197, "learning_rate": 0.00010530405827668372, "loss": 1.3265, "step": 13923 }, { "epoch": 0.49864809210879724, "grad_norm": 1.4622187614440918, "learning_rate": 0.00010529247555535738, "loss": 1.7136, "step": 13924 }, { "epoch": 0.49868390423836556, "grad_norm": 1.473183274269104, "learning_rate": 0.00010528089276282762, "loss": 1.6749, "step": 13925 }, { "epoch": 0.49871971636793383, "grad_norm": 1.5246021747589111, "learning_rate": 0.00010526930989925023, "loss": 1.5913, "step": 13926 }, { "epoch": 0.4987555284975021, "grad_norm": 1.6022064685821533, "learning_rate": 0.0001052577269647811, "loss": 1.4599, "step": 13927 }, { "epoch": 0.49879134062707037, "grad_norm": 1.3866850137710571, "learning_rate": 0.00010524614395957602, "loss": 1.3702, "step": 13928 }, { "epoch": 0.4988271527566387, "grad_norm": 2.0144548416137695, "learning_rate": 0.00010523456088379084, "loss": 1.582, "step": 13929 }, { "epoch": 0.49886296488620696, "grad_norm": 2.1504111289978027, "learning_rate": 0.00010522297773758141, "loss": 1.6746, "step": 13930 }, { "epoch": 0.49889877701577523, "grad_norm": 1.4238390922546387, "learning_rate": 0.00010521139452110354, "loss": 1.6415, "step": 13931 }, { "epoch": 0.49893458914534355, "grad_norm": 2.0092551708221436, "learning_rate": 0.0001051998112345131, "loss": 1.1434, "step": 13932 }, { "epoch": 0.4989704012749118, "grad_norm": 1.7443785667419434, "learning_rate": 0.00010518822787796587, "loss": 1.6474, "step": 13933 }, { "epoch": 0.4990062134044801, "grad_norm": 1.9198230504989624, "learning_rate": 0.00010517664445161775, "loss": 1.291, "step": 13934 }, { "epoch": 0.49904202553404836, "grad_norm": 2.1047768592834473, "learning_rate": 0.00010516506095562455, "loss": 1.4204, "step": 13935 }, { "epoch": 0.4990778376636167, "grad_norm": 1.772157907485962, "learning_rate": 0.00010515347739014212, "loss": 1.4815, "step": 13936 }, { "epoch": 0.49911364979318495, "grad_norm": 1.4584022760391235, "learning_rate": 0.00010514189375532629, "loss": 1.1986, "step": 13937 }, { "epoch": 0.4991494619227532, "grad_norm": 1.6682506799697876, "learning_rate": 0.00010513031005133293, "loss": 1.1585, "step": 13938 }, { "epoch": 0.49918527405232155, "grad_norm": 2.0032548904418945, "learning_rate": 0.00010511872627831785, "loss": 1.8689, "step": 13939 }, { "epoch": 0.4992210861818898, "grad_norm": 1.799172043800354, "learning_rate": 0.00010510714243643693, "loss": 1.2141, "step": 13940 }, { "epoch": 0.4992568983114581, "grad_norm": 1.4442138671875, "learning_rate": 0.00010509555852584598, "loss": 1.3636, "step": 13941 }, { "epoch": 0.49929271044102636, "grad_norm": 1.4175442457199097, "learning_rate": 0.00010508397454670085, "loss": 1.4478, "step": 13942 }, { "epoch": 0.4993285225705947, "grad_norm": 2.0027573108673096, "learning_rate": 0.00010507239049915742, "loss": 1.8723, "step": 13943 }, { "epoch": 0.49936433470016295, "grad_norm": 1.363532543182373, "learning_rate": 0.00010506080638337152, "loss": 1.5395, "step": 13944 }, { "epoch": 0.4994001468297312, "grad_norm": 2.1520915031433105, "learning_rate": 0.000105049222199499, "loss": 1.5888, "step": 13945 }, { "epoch": 0.49943595895929954, "grad_norm": 1.5983831882476807, "learning_rate": 0.0001050376379476957, "loss": 1.2995, "step": 13946 }, { "epoch": 0.4994717710888678, "grad_norm": 1.9708709716796875, "learning_rate": 0.00010502605362811748, "loss": 1.3502, "step": 13947 }, { "epoch": 0.4995075832184361, "grad_norm": 1.6863350868225098, "learning_rate": 0.00010501446924092018, "loss": 1.3486, "step": 13948 }, { "epoch": 0.49954339534800435, "grad_norm": 1.7589701414108276, "learning_rate": 0.0001050028847862597, "loss": 1.4596, "step": 13949 }, { "epoch": 0.4995792074775727, "grad_norm": 1.4610109329223633, "learning_rate": 0.00010499130026429182, "loss": 1.5116, "step": 13950 }, { "epoch": 0.49961501960714094, "grad_norm": 1.776166558265686, "learning_rate": 0.00010497971567517246, "loss": 1.3748, "step": 13951 }, { "epoch": 0.4996508317367092, "grad_norm": 2.0865702629089355, "learning_rate": 0.00010496813101905745, "loss": 1.4648, "step": 13952 }, { "epoch": 0.49968664386627754, "grad_norm": 1.7952061891555786, "learning_rate": 0.00010495654629610264, "loss": 1.233, "step": 13953 }, { "epoch": 0.4997224559958458, "grad_norm": 2.238560914993286, "learning_rate": 0.00010494496150646387, "loss": 1.596, "step": 13954 }, { "epoch": 0.4997582681254141, "grad_norm": 1.680570125579834, "learning_rate": 0.000104933376650297, "loss": 1.4756, "step": 13955 }, { "epoch": 0.49979408025498234, "grad_norm": 2.210883140563965, "learning_rate": 0.00010492179172775797, "loss": 1.3497, "step": 13956 }, { "epoch": 0.49982989238455067, "grad_norm": 1.6548429727554321, "learning_rate": 0.00010491020673900256, "loss": 1.4613, "step": 13957 }, { "epoch": 0.49986570451411894, "grad_norm": 1.5173407793045044, "learning_rate": 0.00010489862168418667, "loss": 1.237, "step": 13958 }, { "epoch": 0.4999015166436872, "grad_norm": 1.6748205423355103, "learning_rate": 0.00010488703656346612, "loss": 1.6796, "step": 13959 }, { "epoch": 0.49993732877325553, "grad_norm": 2.2903249263763428, "learning_rate": 0.00010487545137699682, "loss": 1.3784, "step": 13960 }, { "epoch": 0.4999731409028238, "grad_norm": 1.832918643951416, "learning_rate": 0.00010486386612493458, "loss": 1.4155, "step": 13961 }, { "epoch": 0.5000089530323921, "grad_norm": 1.8992984294891357, "learning_rate": 0.00010485228080743532, "loss": 1.3437, "step": 13962 }, { "epoch": 0.5000447651619604, "grad_norm": 1.5817441940307617, "learning_rate": 0.00010484069542465484, "loss": 1.2825, "step": 13963 }, { "epoch": 0.5000805772915287, "grad_norm": 1.7901121377944946, "learning_rate": 0.00010482910997674911, "loss": 1.4525, "step": 13964 }, { "epoch": 0.5001163894210969, "grad_norm": 1.9378963708877563, "learning_rate": 0.00010481752446387387, "loss": 1.5827, "step": 13965 }, { "epoch": 0.5001522015506652, "grad_norm": 1.3163588047027588, "learning_rate": 0.0001048059388861851, "loss": 1.3767, "step": 13966 }, { "epoch": 0.5001880136802335, "grad_norm": 2.0047686100006104, "learning_rate": 0.00010479435324383861, "loss": 1.4476, "step": 13967 }, { "epoch": 0.5002238258098017, "grad_norm": 1.7671470642089844, "learning_rate": 0.00010478276753699028, "loss": 1.523, "step": 13968 }, { "epoch": 0.5002596379393701, "grad_norm": 1.4821014404296875, "learning_rate": 0.00010477118176579597, "loss": 1.4437, "step": 13969 }, { "epoch": 0.5002954500689384, "grad_norm": 2.142829656600952, "learning_rate": 0.00010475959593041156, "loss": 1.5739, "step": 13970 }, { "epoch": 0.5003312621985067, "grad_norm": 2.060955047607422, "learning_rate": 0.00010474801003099294, "loss": 1.5802, "step": 13971 }, { "epoch": 0.5003670743280749, "grad_norm": 1.8736474514007568, "learning_rate": 0.00010473642406769597, "loss": 1.2844, "step": 13972 }, { "epoch": 0.5004028864576432, "grad_norm": 1.4875705242156982, "learning_rate": 0.00010472483804067652, "loss": 1.0674, "step": 13973 }, { "epoch": 0.5004386985872115, "grad_norm": 1.404869556427002, "learning_rate": 0.00010471325195009047, "loss": 1.6174, "step": 13974 }, { "epoch": 0.5004745107167797, "grad_norm": 1.461624264717102, "learning_rate": 0.00010470166579609371, "loss": 1.5879, "step": 13975 }, { "epoch": 0.5005103228463481, "grad_norm": 1.3409490585327148, "learning_rate": 0.0001046900795788421, "loss": 1.4595, "step": 13976 }, { "epoch": 0.5005461349759164, "grad_norm": 1.7915490865707397, "learning_rate": 0.00010467849329849148, "loss": 1.5817, "step": 13977 }, { "epoch": 0.5005819471054846, "grad_norm": 1.4532082080841064, "learning_rate": 0.00010466690695519781, "loss": 1.4423, "step": 13978 }, { "epoch": 0.5006177592350529, "grad_norm": 1.5998505353927612, "learning_rate": 0.00010465532054911689, "loss": 1.564, "step": 13979 }, { "epoch": 0.5006535713646212, "grad_norm": 1.4868236780166626, "learning_rate": 0.00010464373408040467, "loss": 1.6871, "step": 13980 }, { "epoch": 0.5006893834941895, "grad_norm": 1.7556103467941284, "learning_rate": 0.00010463214754921697, "loss": 1.4825, "step": 13981 }, { "epoch": 0.5007251956237577, "grad_norm": 2.2433080673217773, "learning_rate": 0.00010462056095570974, "loss": 1.5892, "step": 13982 }, { "epoch": 0.5007610077533261, "grad_norm": 1.4293811321258545, "learning_rate": 0.00010460897430003877, "loss": 1.4694, "step": 13983 }, { "epoch": 0.5007968198828944, "grad_norm": 2.3637685775756836, "learning_rate": 0.00010459738758236006, "loss": 2.1556, "step": 13984 }, { "epoch": 0.5008326320124626, "grad_norm": 1.4137765169143677, "learning_rate": 0.00010458580080282938, "loss": 1.5547, "step": 13985 }, { "epoch": 0.5008684441420309, "grad_norm": 1.7568750381469727, "learning_rate": 0.00010457421396160265, "loss": 1.4713, "step": 13986 }, { "epoch": 0.5009042562715992, "grad_norm": 1.891427755355835, "learning_rate": 0.00010456262705883581, "loss": 1.0563, "step": 13987 }, { "epoch": 0.5009400684011674, "grad_norm": 1.6349992752075195, "learning_rate": 0.0001045510400946847, "loss": 1.4901, "step": 13988 }, { "epoch": 0.5009758805307357, "grad_norm": 1.4608160257339478, "learning_rate": 0.00010453945306930521, "loss": 1.507, "step": 13989 }, { "epoch": 0.5010116926603041, "grad_norm": 1.6028715372085571, "learning_rate": 0.00010452786598285323, "loss": 1.6952, "step": 13990 }, { "epoch": 0.5010475047898724, "grad_norm": 1.4232906103134155, "learning_rate": 0.00010451627883548468, "loss": 1.4949, "step": 13991 }, { "epoch": 0.5010833169194406, "grad_norm": 1.2715191841125488, "learning_rate": 0.00010450469162735539, "loss": 1.3847, "step": 13992 }, { "epoch": 0.5011191290490089, "grad_norm": 1.617362380027771, "learning_rate": 0.00010449310435862134, "loss": 1.508, "step": 13993 }, { "epoch": 0.5011549411785772, "grad_norm": 1.5455505847930908, "learning_rate": 0.00010448151702943831, "loss": 1.557, "step": 13994 }, { "epoch": 0.5011907533081454, "grad_norm": 1.6332513093948364, "learning_rate": 0.00010446992963996227, "loss": 1.5056, "step": 13995 }, { "epoch": 0.5012265654377137, "grad_norm": 1.7719979286193848, "learning_rate": 0.00010445834219034909, "loss": 1.6498, "step": 13996 }, { "epoch": 0.5012623775672821, "grad_norm": 1.3904492855072021, "learning_rate": 0.00010444675468075467, "loss": 1.4026, "step": 13997 }, { "epoch": 0.5012981896968504, "grad_norm": 1.3235598802566528, "learning_rate": 0.00010443516711133487, "loss": 1.4448, "step": 13998 }, { "epoch": 0.5013340018264186, "grad_norm": 1.7896219491958618, "learning_rate": 0.00010442357948224564, "loss": 1.6782, "step": 13999 }, { "epoch": 0.5013698139559869, "grad_norm": 1.3523547649383545, "learning_rate": 0.00010441199179364287, "loss": 1.0761, "step": 14000 }, { "epoch": 0.5014056260855552, "grad_norm": 1.9005093574523926, "learning_rate": 0.00010440040404568241, "loss": 1.5543, "step": 14001 }, { "epoch": 0.5014414382151234, "grad_norm": 1.3568506240844727, "learning_rate": 0.00010438881623852026, "loss": 1.6062, "step": 14002 }, { "epoch": 0.5014772503446917, "grad_norm": 1.6488691568374634, "learning_rate": 0.00010437722837231218, "loss": 1.7009, "step": 14003 }, { "epoch": 0.5015130624742601, "grad_norm": 1.5517204999923706, "learning_rate": 0.00010436564044721415, "loss": 1.7509, "step": 14004 }, { "epoch": 0.5015488746038284, "grad_norm": 1.2834587097167969, "learning_rate": 0.00010435405246338205, "loss": 1.4208, "step": 14005 }, { "epoch": 0.5015846867333966, "grad_norm": 1.3293063640594482, "learning_rate": 0.00010434246442097184, "loss": 1.4019, "step": 14006 }, { "epoch": 0.5016204988629649, "grad_norm": 2.0238394737243652, "learning_rate": 0.00010433087632013931, "loss": 1.3617, "step": 14007 }, { "epoch": 0.5016563109925332, "grad_norm": 1.7734540700912476, "learning_rate": 0.00010431928816104048, "loss": 1.3808, "step": 14008 }, { "epoch": 0.5016921231221014, "grad_norm": 1.458070158958435, "learning_rate": 0.00010430769994383116, "loss": 1.4744, "step": 14009 }, { "epoch": 0.5017279352516697, "grad_norm": 1.3446053266525269, "learning_rate": 0.0001042961116686673, "loss": 1.4234, "step": 14010 }, { "epoch": 0.5017637473812381, "grad_norm": 2.673496723175049, "learning_rate": 0.00010428452333570482, "loss": 1.9296, "step": 14011 }, { "epoch": 0.5017995595108063, "grad_norm": 2.1430649757385254, "learning_rate": 0.0001042729349450996, "loss": 1.4881, "step": 14012 }, { "epoch": 0.5018353716403746, "grad_norm": 1.6296019554138184, "learning_rate": 0.00010426134649700754, "loss": 1.1715, "step": 14013 }, { "epoch": 0.5018711837699429, "grad_norm": 1.853363275527954, "learning_rate": 0.00010424975799158456, "loss": 1.3376, "step": 14014 }, { "epoch": 0.5019069958995112, "grad_norm": 1.6113402843475342, "learning_rate": 0.00010423816942898659, "loss": 1.6665, "step": 14015 }, { "epoch": 0.5019428080290794, "grad_norm": 1.607465147972107, "learning_rate": 0.00010422658080936947, "loss": 1.5115, "step": 14016 }, { "epoch": 0.5019786201586477, "grad_norm": 1.796221375465393, "learning_rate": 0.00010421499213288919, "loss": 1.5607, "step": 14017 }, { "epoch": 0.5020144322882161, "grad_norm": 2.2916972637176514, "learning_rate": 0.00010420340339970163, "loss": 1.6604, "step": 14018 }, { "epoch": 0.5020502444177843, "grad_norm": 1.6994593143463135, "learning_rate": 0.0001041918146099627, "loss": 1.5249, "step": 14019 }, { "epoch": 0.5020860565473526, "grad_norm": 1.3633710145950317, "learning_rate": 0.00010418022576382831, "loss": 1.2622, "step": 14020 }, { "epoch": 0.5021218686769209, "grad_norm": 2.406529426574707, "learning_rate": 0.00010416863686145434, "loss": 1.3539, "step": 14021 }, { "epoch": 0.5021576808064891, "grad_norm": 1.4602991342544556, "learning_rate": 0.00010415704790299678, "loss": 1.3125, "step": 14022 }, { "epoch": 0.5021934929360574, "grad_norm": 1.564888596534729, "learning_rate": 0.00010414545888861149, "loss": 1.4893, "step": 14023 }, { "epoch": 0.5022293050656257, "grad_norm": 1.9032400846481323, "learning_rate": 0.0001041338698184544, "loss": 1.425, "step": 14024 }, { "epoch": 0.5022651171951941, "grad_norm": 2.332868814468384, "learning_rate": 0.00010412228069268142, "loss": 1.4777, "step": 14025 }, { "epoch": 0.5023009293247623, "grad_norm": 1.6929960250854492, "learning_rate": 0.00010411069151144848, "loss": 1.729, "step": 14026 }, { "epoch": 0.5023367414543306, "grad_norm": 2.2395436763763428, "learning_rate": 0.00010409910227491146, "loss": 1.6846, "step": 14027 }, { "epoch": 0.5023725535838989, "grad_norm": 1.671336054801941, "learning_rate": 0.00010408751298322634, "loss": 1.2541, "step": 14028 }, { "epoch": 0.5024083657134671, "grad_norm": 1.7518086433410645, "learning_rate": 0.00010407592363654901, "loss": 1.3721, "step": 14029 }, { "epoch": 0.5024441778430354, "grad_norm": 2.0253491401672363, "learning_rate": 0.00010406433423503534, "loss": 1.5186, "step": 14030 }, { "epoch": 0.5024799899726037, "grad_norm": 1.4864667654037476, "learning_rate": 0.00010405274477884135, "loss": 1.1936, "step": 14031 }, { "epoch": 0.5025158021021721, "grad_norm": 1.1731634140014648, "learning_rate": 0.00010404115526812286, "loss": 1.4125, "step": 14032 }, { "epoch": 0.5025516142317403, "grad_norm": 1.3496315479278564, "learning_rate": 0.00010402956570303586, "loss": 1.3462, "step": 14033 }, { "epoch": 0.5025874263613086, "grad_norm": 1.6473044157028198, "learning_rate": 0.00010401797608373625, "loss": 1.7143, "step": 14034 }, { "epoch": 0.5026232384908769, "grad_norm": 2.9563591480255127, "learning_rate": 0.00010400638641037996, "loss": 1.505, "step": 14035 }, { "epoch": 0.5026590506204451, "grad_norm": 1.4683979749679565, "learning_rate": 0.00010399479668312288, "loss": 1.5082, "step": 14036 }, { "epoch": 0.5026948627500134, "grad_norm": 1.4357550144195557, "learning_rate": 0.00010398320690212102, "loss": 1.4316, "step": 14037 }, { "epoch": 0.5027306748795817, "grad_norm": 1.8764081001281738, "learning_rate": 0.00010397161706753021, "loss": 1.4092, "step": 14038 }, { "epoch": 0.50276648700915, "grad_norm": 1.5990819931030273, "learning_rate": 0.00010396002717950644, "loss": 1.4917, "step": 14039 }, { "epoch": 0.5028022991387183, "grad_norm": 1.571290373802185, "learning_rate": 0.00010394843723820558, "loss": 1.3678, "step": 14040 }, { "epoch": 0.5028381112682866, "grad_norm": 2.3405323028564453, "learning_rate": 0.00010393684724378358, "loss": 1.4077, "step": 14041 }, { "epoch": 0.5028739233978549, "grad_norm": 1.5465885400772095, "learning_rate": 0.00010392525719639642, "loss": 0.9557, "step": 14042 }, { "epoch": 0.5029097355274231, "grad_norm": 1.379290223121643, "learning_rate": 0.00010391366709619994, "loss": 1.3364, "step": 14043 }, { "epoch": 0.5029455476569914, "grad_norm": 1.6196434497833252, "learning_rate": 0.00010390207694335017, "loss": 1.4637, "step": 14044 }, { "epoch": 0.5029813597865597, "grad_norm": 1.7534099817276, "learning_rate": 0.00010389048673800294, "loss": 1.4994, "step": 14045 }, { "epoch": 0.503017171916128, "grad_norm": 2.0215682983398438, "learning_rate": 0.00010387889648031428, "loss": 1.5993, "step": 14046 }, { "epoch": 0.5030529840456963, "grad_norm": 1.6501681804656982, "learning_rate": 0.00010386730617044005, "loss": 1.3859, "step": 14047 }, { "epoch": 0.5030887961752646, "grad_norm": 1.9652694463729858, "learning_rate": 0.0001038557158085362, "loss": 1.6148, "step": 14048 }, { "epoch": 0.5031246083048329, "grad_norm": 1.6333247423171997, "learning_rate": 0.00010384412539475865, "loss": 1.1812, "step": 14049 }, { "epoch": 0.5031604204344011, "grad_norm": 1.772522211074829, "learning_rate": 0.00010383253492926339, "loss": 1.6932, "step": 14050 }, { "epoch": 0.5031962325639694, "grad_norm": 1.6307519674301147, "learning_rate": 0.00010382094441220627, "loss": 1.4595, "step": 14051 }, { "epoch": 0.5032320446935377, "grad_norm": 1.7731724977493286, "learning_rate": 0.00010380935384374331, "loss": 1.5997, "step": 14052 }, { "epoch": 0.503267856823106, "grad_norm": 1.5439388751983643, "learning_rate": 0.00010379776322403039, "loss": 1.2718, "step": 14053 }, { "epoch": 0.5033036689526743, "grad_norm": 2.08516001701355, "learning_rate": 0.00010378617255322344, "loss": 1.1215, "step": 14054 }, { "epoch": 0.5033394810822426, "grad_norm": 1.9739441871643066, "learning_rate": 0.00010377458183147848, "loss": 1.5623, "step": 14055 }, { "epoch": 0.5033752932118108, "grad_norm": 1.473125696182251, "learning_rate": 0.00010376299105895135, "loss": 1.4087, "step": 14056 }, { "epoch": 0.5034111053413791, "grad_norm": 1.925338625907898, "learning_rate": 0.00010375140023579805, "loss": 1.5258, "step": 14057 }, { "epoch": 0.5034469174709474, "grad_norm": 1.9338470697402954, "learning_rate": 0.0001037398093621745, "loss": 1.7284, "step": 14058 }, { "epoch": 0.5034827296005157, "grad_norm": 1.9991499185562134, "learning_rate": 0.00010372821843823661, "loss": 1.5905, "step": 14059 }, { "epoch": 0.503518541730084, "grad_norm": 1.4696353673934937, "learning_rate": 0.00010371662746414037, "loss": 1.4171, "step": 14060 }, { "epoch": 0.5035543538596523, "grad_norm": 1.5892161130905151, "learning_rate": 0.00010370503644004171, "loss": 1.5635, "step": 14061 }, { "epoch": 0.5035901659892206, "grad_norm": 1.718597650527954, "learning_rate": 0.00010369344536609653, "loss": 1.7008, "step": 14062 }, { "epoch": 0.5036259781187888, "grad_norm": 1.6046531200408936, "learning_rate": 0.00010368185424246084, "loss": 1.7398, "step": 14063 }, { "epoch": 0.5036617902483571, "grad_norm": 1.702903389930725, "learning_rate": 0.00010367026306929056, "loss": 1.6981, "step": 14064 }, { "epoch": 0.5036976023779254, "grad_norm": 2.5990512371063232, "learning_rate": 0.00010365867184674159, "loss": 1.39, "step": 14065 }, { "epoch": 0.5037334145074936, "grad_norm": 1.5704472064971924, "learning_rate": 0.00010364708057496992, "loss": 1.6272, "step": 14066 }, { "epoch": 0.5037692266370619, "grad_norm": 1.577022671699524, "learning_rate": 0.00010363548925413149, "loss": 1.2842, "step": 14067 }, { "epoch": 0.5038050387666303, "grad_norm": 2.005908250808716, "learning_rate": 0.00010362389788438225, "loss": 1.2809, "step": 14068 }, { "epoch": 0.5038408508961986, "grad_norm": 2.519007444381714, "learning_rate": 0.00010361230646587812, "loss": 1.509, "step": 14069 }, { "epoch": 0.5038766630257668, "grad_norm": 2.2054390907287598, "learning_rate": 0.00010360071499877508, "loss": 1.4182, "step": 14070 }, { "epoch": 0.5039124751553351, "grad_norm": 1.6459708213806152, "learning_rate": 0.00010358912348322904, "loss": 1.1879, "step": 14071 }, { "epoch": 0.5039482872849034, "grad_norm": 2.3594183921813965, "learning_rate": 0.00010357753191939601, "loss": 1.7322, "step": 14072 }, { "epoch": 0.5039840994144716, "grad_norm": 1.8860596418380737, "learning_rate": 0.0001035659403074319, "loss": 1.8158, "step": 14073 }, { "epoch": 0.5040199115440399, "grad_norm": 1.5959560871124268, "learning_rate": 0.00010355434864749262, "loss": 1.337, "step": 14074 }, { "epoch": 0.5040557236736083, "grad_norm": 1.7443625926971436, "learning_rate": 0.0001035427569397342, "loss": 1.4158, "step": 14075 }, { "epoch": 0.5040915358031766, "grad_norm": 1.536818027496338, "learning_rate": 0.00010353116518431254, "loss": 1.351, "step": 14076 }, { "epoch": 0.5041273479327448, "grad_norm": 1.3087923526763916, "learning_rate": 0.00010351957338138363, "loss": 1.7281, "step": 14077 }, { "epoch": 0.5041631600623131, "grad_norm": 1.8406116962432861, "learning_rate": 0.00010350798153110337, "loss": 1.114, "step": 14078 }, { "epoch": 0.5041989721918814, "grad_norm": 1.7270954847335815, "learning_rate": 0.00010349638963362777, "loss": 1.6466, "step": 14079 }, { "epoch": 0.5042347843214496, "grad_norm": 2.479389190673828, "learning_rate": 0.00010348479768911272, "loss": 1.2647, "step": 14080 }, { "epoch": 0.5042705964510179, "grad_norm": 1.7443976402282715, "learning_rate": 0.00010347320569771428, "loss": 1.6703, "step": 14081 }, { "epoch": 0.5043064085805863, "grad_norm": 1.6824451684951782, "learning_rate": 0.00010346161365958829, "loss": 1.1959, "step": 14082 }, { "epoch": 0.5043422207101546, "grad_norm": 1.8849799633026123, "learning_rate": 0.00010345002157489074, "loss": 1.3789, "step": 14083 }, { "epoch": 0.5043780328397228, "grad_norm": 1.5173338651657104, "learning_rate": 0.00010343842944377764, "loss": 1.7663, "step": 14084 }, { "epoch": 0.5044138449692911, "grad_norm": 1.5596895217895508, "learning_rate": 0.00010342683726640487, "loss": 1.4884, "step": 14085 }, { "epoch": 0.5044496570988594, "grad_norm": 1.4701156616210938, "learning_rate": 0.00010341524504292845, "loss": 1.5526, "step": 14086 }, { "epoch": 0.5044854692284276, "grad_norm": 1.7516635656356812, "learning_rate": 0.00010340365277350428, "loss": 1.4059, "step": 14087 }, { "epoch": 0.5045212813579959, "grad_norm": 1.8188012838363647, "learning_rate": 0.0001033920604582884, "loss": 1.4001, "step": 14088 }, { "epoch": 0.5045570934875643, "grad_norm": 2.2817301750183105, "learning_rate": 0.00010338046809743668, "loss": 1.162, "step": 14089 }, { "epoch": 0.5045929056171325, "grad_norm": 1.7542790174484253, "learning_rate": 0.00010336887569110518, "loss": 1.505, "step": 14090 }, { "epoch": 0.5046287177467008, "grad_norm": 1.8242846727371216, "learning_rate": 0.00010335728323944974, "loss": 1.319, "step": 14091 }, { "epoch": 0.5046645298762691, "grad_norm": 1.5669910907745361, "learning_rate": 0.00010334569074262641, "loss": 1.4626, "step": 14092 }, { "epoch": 0.5047003420058374, "grad_norm": 1.8271517753601074, "learning_rate": 0.00010333409820079112, "loss": 1.4747, "step": 14093 }, { "epoch": 0.5047361541354056, "grad_norm": 1.7885736227035522, "learning_rate": 0.00010332250561409986, "loss": 1.4699, "step": 14094 }, { "epoch": 0.5047719662649739, "grad_norm": 1.3920366764068604, "learning_rate": 0.00010331091298270854, "loss": 1.4448, "step": 14095 }, { "epoch": 0.5048077783945423, "grad_norm": 2.2439980506896973, "learning_rate": 0.00010329932030677316, "loss": 1.6206, "step": 14096 }, { "epoch": 0.5048435905241105, "grad_norm": 1.532996654510498, "learning_rate": 0.00010328772758644971, "loss": 1.4726, "step": 14097 }, { "epoch": 0.5048794026536788, "grad_norm": 1.5202641487121582, "learning_rate": 0.00010327613482189409, "loss": 1.3134, "step": 14098 }, { "epoch": 0.5049152147832471, "grad_norm": 2.1792054176330566, "learning_rate": 0.00010326454201326236, "loss": 1.3475, "step": 14099 }, { "epoch": 0.5049510269128153, "grad_norm": 1.6390247344970703, "learning_rate": 0.00010325294916071038, "loss": 1.262, "step": 14100 }, { "epoch": 0.5049868390423836, "grad_norm": 1.5163406133651733, "learning_rate": 0.00010324135626439419, "loss": 1.4472, "step": 14101 }, { "epoch": 0.5050226511719519, "grad_norm": 1.6749697923660278, "learning_rate": 0.0001032297633244697, "loss": 1.4647, "step": 14102 }, { "epoch": 0.5050584633015203, "grad_norm": 1.6296559572219849, "learning_rate": 0.00010321817034109293, "loss": 1.1982, "step": 14103 }, { "epoch": 0.5050942754310885, "grad_norm": 1.5373493432998657, "learning_rate": 0.00010320657731441982, "loss": 1.4018, "step": 14104 }, { "epoch": 0.5051300875606568, "grad_norm": 1.7386698722839355, "learning_rate": 0.00010319498424460636, "loss": 1.302, "step": 14105 }, { "epoch": 0.5051658996902251, "grad_norm": 2.1452550888061523, "learning_rate": 0.0001031833911318085, "loss": 1.5013, "step": 14106 }, { "epoch": 0.5052017118197933, "grad_norm": 1.7856847047805786, "learning_rate": 0.00010317179797618223, "loss": 1.5817, "step": 14107 }, { "epoch": 0.5052375239493616, "grad_norm": 1.160574197769165, "learning_rate": 0.00010316020477788353, "loss": 1.6855, "step": 14108 }, { "epoch": 0.5052733360789299, "grad_norm": 1.5507274866104126, "learning_rate": 0.0001031486115370683, "loss": 1.6322, "step": 14109 }, { "epoch": 0.5053091482084983, "grad_norm": 1.567002296447754, "learning_rate": 0.00010313701825389259, "loss": 1.4388, "step": 14110 }, { "epoch": 0.5053449603380665, "grad_norm": 1.6256871223449707, "learning_rate": 0.00010312542492851234, "loss": 1.1, "step": 14111 }, { "epoch": 0.5053807724676348, "grad_norm": 1.9150153398513794, "learning_rate": 0.00010311383156108354, "loss": 1.8181, "step": 14112 }, { "epoch": 0.5054165845972031, "grad_norm": 1.7468947172164917, "learning_rate": 0.00010310223815176215, "loss": 1.3511, "step": 14113 }, { "epoch": 0.5054523967267713, "grad_norm": 1.5880811214447021, "learning_rate": 0.00010309064470070414, "loss": 1.5384, "step": 14114 }, { "epoch": 0.5054882088563396, "grad_norm": 1.6013243198394775, "learning_rate": 0.00010307905120806549, "loss": 1.4457, "step": 14115 }, { "epoch": 0.5055240209859079, "grad_norm": 1.7990401983261108, "learning_rate": 0.00010306745767400219, "loss": 1.4054, "step": 14116 }, { "epoch": 0.5055598331154763, "grad_norm": 2.4490861892700195, "learning_rate": 0.0001030558640986702, "loss": 1.6798, "step": 14117 }, { "epoch": 0.5055956452450445, "grad_norm": 1.4762895107269287, "learning_rate": 0.0001030442704822255, "loss": 1.1657, "step": 14118 }, { "epoch": 0.5056314573746128, "grad_norm": 1.4917511940002441, "learning_rate": 0.00010303267682482405, "loss": 1.4921, "step": 14119 }, { "epoch": 0.5056672695041811, "grad_norm": 1.858914852142334, "learning_rate": 0.00010302108312662184, "loss": 1.2912, "step": 14120 }, { "epoch": 0.5057030816337493, "grad_norm": 2.0417349338531494, "learning_rate": 0.00010300948938777491, "loss": 1.7917, "step": 14121 }, { "epoch": 0.5057388937633176, "grad_norm": 1.6435681581497192, "learning_rate": 0.00010299789560843911, "loss": 1.3145, "step": 14122 }, { "epoch": 0.5057747058928859, "grad_norm": 1.7804651260375977, "learning_rate": 0.00010298630178877053, "loss": 1.4567, "step": 14123 }, { "epoch": 0.5058105180224542, "grad_norm": 1.9142791032791138, "learning_rate": 0.00010297470792892512, "loss": 1.1872, "step": 14124 }, { "epoch": 0.5058463301520225, "grad_norm": 2.6280245780944824, "learning_rate": 0.00010296311402905884, "loss": 1.3906, "step": 14125 }, { "epoch": 0.5058821422815908, "grad_norm": 1.2249655723571777, "learning_rate": 0.0001029515200893277, "loss": 1.4255, "step": 14126 }, { "epoch": 0.505917954411159, "grad_norm": 1.620898962020874, "learning_rate": 0.00010293992610988763, "loss": 1.3322, "step": 14127 }, { "epoch": 0.5059537665407273, "grad_norm": 1.8030281066894531, "learning_rate": 0.00010292833209089467, "loss": 1.2237, "step": 14128 }, { "epoch": 0.5059895786702956, "grad_norm": 1.8104889392852783, "learning_rate": 0.00010291673803250477, "loss": 1.4957, "step": 14129 }, { "epoch": 0.5060253907998639, "grad_norm": 1.5003832578659058, "learning_rate": 0.00010290514393487391, "loss": 1.5665, "step": 14130 }, { "epoch": 0.5060612029294322, "grad_norm": 1.9296363592147827, "learning_rate": 0.00010289354979815811, "loss": 1.5859, "step": 14131 }, { "epoch": 0.5060970150590005, "grad_norm": 1.9950153827667236, "learning_rate": 0.00010288195562251332, "loss": 1.5064, "step": 14132 }, { "epoch": 0.5061328271885688, "grad_norm": 2.030654191970825, "learning_rate": 0.00010287036140809552, "loss": 1.5293, "step": 14133 }, { "epoch": 0.506168639318137, "grad_norm": 1.89315664768219, "learning_rate": 0.00010285876715506076, "loss": 1.3902, "step": 14134 }, { "epoch": 0.5062044514477053, "grad_norm": 1.6932226419448853, "learning_rate": 0.00010284717286356493, "loss": 1.3832, "step": 14135 }, { "epoch": 0.5062402635772736, "grad_norm": 1.2903110980987549, "learning_rate": 0.00010283557853376408, "loss": 1.3911, "step": 14136 }, { "epoch": 0.5062760757068419, "grad_norm": 1.9288479089736938, "learning_rate": 0.00010282398416581415, "loss": 1.7958, "step": 14137 }, { "epoch": 0.5063118878364102, "grad_norm": 1.6040328741073608, "learning_rate": 0.00010281238975987118, "loss": 1.5238, "step": 14138 }, { "epoch": 0.5063476999659785, "grad_norm": 2.265596866607666, "learning_rate": 0.00010280079531609112, "loss": 1.4235, "step": 14139 }, { "epoch": 0.5063835120955468, "grad_norm": 1.681878685951233, "learning_rate": 0.00010278920083462997, "loss": 1.6886, "step": 14140 }, { "epoch": 0.506419324225115, "grad_norm": 2.133225917816162, "learning_rate": 0.00010277760631564375, "loss": 1.2276, "step": 14141 }, { "epoch": 0.5064551363546833, "grad_norm": 1.6065014600753784, "learning_rate": 0.00010276601175928839, "loss": 1.4719, "step": 14142 }, { "epoch": 0.5064909484842516, "grad_norm": 1.765504240989685, "learning_rate": 0.00010275441716571996, "loss": 1.3059, "step": 14143 }, { "epoch": 0.5065267606138198, "grad_norm": 1.4708679914474487, "learning_rate": 0.00010274282253509436, "loss": 1.4785, "step": 14144 }, { "epoch": 0.5065625727433882, "grad_norm": 1.4955041408538818, "learning_rate": 0.00010273122786756762, "loss": 1.5128, "step": 14145 }, { "epoch": 0.5065983848729565, "grad_norm": 1.961424469947815, "learning_rate": 0.00010271963316329571, "loss": 1.4041, "step": 14146 }, { "epoch": 0.5066341970025248, "grad_norm": 1.4497519731521606, "learning_rate": 0.00010270803842243469, "loss": 1.7535, "step": 14147 }, { "epoch": 0.506670009132093, "grad_norm": 1.6153457164764404, "learning_rate": 0.00010269644364514046, "loss": 1.5254, "step": 14148 }, { "epoch": 0.5067058212616613, "grad_norm": 1.7884387969970703, "learning_rate": 0.0001026848488315691, "loss": 1.5087, "step": 14149 }, { "epoch": 0.5067416333912296, "grad_norm": 1.7952286005020142, "learning_rate": 0.00010267325398187653, "loss": 1.4182, "step": 14150 }, { "epoch": 0.5067774455207978, "grad_norm": 2.6730382442474365, "learning_rate": 0.00010266165909621879, "loss": 1.722, "step": 14151 }, { "epoch": 0.5068132576503662, "grad_norm": 1.7425639629364014, "learning_rate": 0.00010265006417475189, "loss": 1.6575, "step": 14152 }, { "epoch": 0.5068490697799345, "grad_norm": 1.971174955368042, "learning_rate": 0.00010263846921763174, "loss": 1.3717, "step": 14153 }, { "epoch": 0.5068848819095028, "grad_norm": 1.8181272745132446, "learning_rate": 0.00010262687422501442, "loss": 1.2995, "step": 14154 }, { "epoch": 0.506920694039071, "grad_norm": 1.9725772142410278, "learning_rate": 0.00010261527919705589, "loss": 1.7611, "step": 14155 }, { "epoch": 0.5069565061686393, "grad_norm": 2.269573926925659, "learning_rate": 0.00010260368413391217, "loss": 1.5659, "step": 14156 }, { "epoch": 0.5069923182982076, "grad_norm": 1.5113763809204102, "learning_rate": 0.0001025920890357392, "loss": 1.4647, "step": 14157 }, { "epoch": 0.5070281304277758, "grad_norm": 1.9952131509780884, "learning_rate": 0.00010258049390269305, "loss": 1.5054, "step": 14158 }, { "epoch": 0.5070639425573442, "grad_norm": 1.6506963968276978, "learning_rate": 0.00010256889873492966, "loss": 1.5954, "step": 14159 }, { "epoch": 0.5070997546869125, "grad_norm": 1.6681649684906006, "learning_rate": 0.00010255730353260507, "loss": 1.2699, "step": 14160 }, { "epoch": 0.5071355668164808, "grad_norm": 1.7544012069702148, "learning_rate": 0.00010254570829587527, "loss": 1.2456, "step": 14161 }, { "epoch": 0.507171378946049, "grad_norm": 1.9082049131393433, "learning_rate": 0.00010253411302489622, "loss": 1.3864, "step": 14162 }, { "epoch": 0.5072071910756173, "grad_norm": 5.814779758453369, "learning_rate": 0.00010252251771982395, "loss": 1.5455, "step": 14163 }, { "epoch": 0.5072430032051856, "grad_norm": 1.5114103555679321, "learning_rate": 0.00010251092238081446, "loss": 1.7425, "step": 14164 }, { "epoch": 0.5072788153347538, "grad_norm": 1.5640950202941895, "learning_rate": 0.00010249932700802376, "loss": 1.5903, "step": 14165 }, { "epoch": 0.5073146274643222, "grad_norm": 2.070931911468506, "learning_rate": 0.00010248773160160782, "loss": 1.5774, "step": 14166 }, { "epoch": 0.5073504395938905, "grad_norm": 1.4230797290802002, "learning_rate": 0.0001024761361617227, "loss": 1.6921, "step": 14167 }, { "epoch": 0.5073862517234587, "grad_norm": 1.4877146482467651, "learning_rate": 0.00010246454068852431, "loss": 1.3322, "step": 14168 }, { "epoch": 0.507422063853027, "grad_norm": 2.0262210369110107, "learning_rate": 0.00010245294518216875, "loss": 1.4121, "step": 14169 }, { "epoch": 0.5074578759825953, "grad_norm": 1.7050576210021973, "learning_rate": 0.00010244134964281195, "loss": 1.336, "step": 14170 }, { "epoch": 0.5074936881121636, "grad_norm": 2.0105693340301514, "learning_rate": 0.00010242975407060995, "loss": 1.2359, "step": 14171 }, { "epoch": 0.5075295002417318, "grad_norm": 1.4477938413619995, "learning_rate": 0.00010241815846571874, "loss": 1.7411, "step": 14172 }, { "epoch": 0.5075653123713002, "grad_norm": 1.3273900747299194, "learning_rate": 0.00010240656282829433, "loss": 1.434, "step": 14173 }, { "epoch": 0.5076011245008685, "grad_norm": 1.914575457572937, "learning_rate": 0.00010239496715849273, "loss": 1.2457, "step": 14174 }, { "epoch": 0.5076369366304367, "grad_norm": 1.6995282173156738, "learning_rate": 0.0001023833714564699, "loss": 1.5573, "step": 14175 }, { "epoch": 0.507672748760005, "grad_norm": 1.8010412454605103, "learning_rate": 0.00010237177572238192, "loss": 1.5153, "step": 14176 }, { "epoch": 0.5077085608895733, "grad_norm": 1.5251318216323853, "learning_rate": 0.00010236017995638472, "loss": 1.5024, "step": 14177 }, { "epoch": 0.5077443730191415, "grad_norm": 3.0716559886932373, "learning_rate": 0.00010234858415863439, "loss": 1.5426, "step": 14178 }, { "epoch": 0.5077801851487098, "grad_norm": 2.010403633117676, "learning_rate": 0.00010233698832928686, "loss": 1.3712, "step": 14179 }, { "epoch": 0.5078159972782782, "grad_norm": 1.76226806640625, "learning_rate": 0.00010232539246849818, "loss": 1.4153, "step": 14180 }, { "epoch": 0.5078518094078465, "grad_norm": 1.8289686441421509, "learning_rate": 0.00010231379657642432, "loss": 1.4113, "step": 14181 }, { "epoch": 0.5078876215374147, "grad_norm": 1.5149344205856323, "learning_rate": 0.00010230220065322132, "loss": 1.4344, "step": 14182 }, { "epoch": 0.507923433666983, "grad_norm": 1.530060052871704, "learning_rate": 0.00010229060469904519, "loss": 1.0206, "step": 14183 }, { "epoch": 0.5079592457965513, "grad_norm": 2.5815203189849854, "learning_rate": 0.00010227900871405191, "loss": 1.6194, "step": 14184 }, { "epoch": 0.5079950579261195, "grad_norm": 1.5144827365875244, "learning_rate": 0.00010226741269839755, "loss": 1.377, "step": 14185 }, { "epoch": 0.5080308700556878, "grad_norm": 1.7543262243270874, "learning_rate": 0.00010225581665223802, "loss": 1.4925, "step": 14186 }, { "epoch": 0.5080666821852562, "grad_norm": 1.8062946796417236, "learning_rate": 0.00010224422057572947, "loss": 1.6961, "step": 14187 }, { "epoch": 0.5081024943148245, "grad_norm": 1.5800213813781738, "learning_rate": 0.00010223262446902775, "loss": 1.6731, "step": 14188 }, { "epoch": 0.5081383064443927, "grad_norm": 1.6881344318389893, "learning_rate": 0.00010222102833228897, "loss": 1.488, "step": 14189 }, { "epoch": 0.508174118573961, "grad_norm": 1.4703749418258667, "learning_rate": 0.00010220943216566912, "loss": 1.2877, "step": 14190 }, { "epoch": 0.5082099307035293, "grad_norm": 2.940575361251831, "learning_rate": 0.00010219783596932421, "loss": 1.3642, "step": 14191 }, { "epoch": 0.5082457428330975, "grad_norm": 1.343244194984436, "learning_rate": 0.00010218623974341024, "loss": 1.5526, "step": 14192 }, { "epoch": 0.5082815549626658, "grad_norm": 1.7862319946289062, "learning_rate": 0.00010217464348808323, "loss": 1.5906, "step": 14193 }, { "epoch": 0.5083173670922342, "grad_norm": 2.7882981300354004, "learning_rate": 0.00010216304720349922, "loss": 1.6598, "step": 14194 }, { "epoch": 0.5083531792218025, "grad_norm": 1.9673621654510498, "learning_rate": 0.00010215145088981419, "loss": 1.6815, "step": 14195 }, { "epoch": 0.5083889913513707, "grad_norm": 1.8103598356246948, "learning_rate": 0.0001021398545471842, "loss": 1.2642, "step": 14196 }, { "epoch": 0.508424803480939, "grad_norm": 1.8523528575897217, "learning_rate": 0.00010212825817576519, "loss": 1.7025, "step": 14197 }, { "epoch": 0.5084606156105073, "grad_norm": 1.207170009613037, "learning_rate": 0.00010211666177571322, "loss": 1.4524, "step": 14198 }, { "epoch": 0.5084964277400755, "grad_norm": 1.7149916887283325, "learning_rate": 0.00010210506534718427, "loss": 1.4284, "step": 14199 }, { "epoch": 0.5085322398696438, "grad_norm": 1.2960801124572754, "learning_rate": 0.00010209346889033442, "loss": 1.4645, "step": 14200 }, { "epoch": 0.5085680519992122, "grad_norm": 1.768568754196167, "learning_rate": 0.00010208187240531962, "loss": 1.2931, "step": 14201 }, { "epoch": 0.5086038641287804, "grad_norm": 1.9338597059249878, "learning_rate": 0.00010207027589229594, "loss": 1.6961, "step": 14202 }, { "epoch": 0.5086396762583487, "grad_norm": 1.3252862691879272, "learning_rate": 0.00010205867935141933, "loss": 1.5093, "step": 14203 }, { "epoch": 0.508675488387917, "grad_norm": 1.6431336402893066, "learning_rate": 0.00010204708278284587, "loss": 1.4769, "step": 14204 }, { "epoch": 0.5087113005174853, "grad_norm": 1.6076064109802246, "learning_rate": 0.00010203548618673155, "loss": 1.5178, "step": 14205 }, { "epoch": 0.5087471126470535, "grad_norm": 1.594680905342102, "learning_rate": 0.00010202388956323238, "loss": 1.4088, "step": 14206 }, { "epoch": 0.5087829247766218, "grad_norm": 1.4047375917434692, "learning_rate": 0.0001020122929125044, "loss": 1.4558, "step": 14207 }, { "epoch": 0.5088187369061902, "grad_norm": 1.6259403228759766, "learning_rate": 0.00010200069623470358, "loss": 1.2941, "step": 14208 }, { "epoch": 0.5088545490357584, "grad_norm": 1.942922830581665, "learning_rate": 0.00010198909952998603, "loss": 1.6474, "step": 14209 }, { "epoch": 0.5088903611653267, "grad_norm": 1.5637397766113281, "learning_rate": 0.00010197750279850767, "loss": 1.4758, "step": 14210 }, { "epoch": 0.508926173294895, "grad_norm": 1.5786371231079102, "learning_rate": 0.00010196590604042457, "loss": 1.3429, "step": 14211 }, { "epoch": 0.5089619854244632, "grad_norm": 1.7941700220108032, "learning_rate": 0.00010195430925589274, "loss": 1.4954, "step": 14212 }, { "epoch": 0.5089977975540315, "grad_norm": 1.7135039567947388, "learning_rate": 0.00010194271244506821, "loss": 1.5166, "step": 14213 }, { "epoch": 0.5090336096835998, "grad_norm": 1.4030609130859375, "learning_rate": 0.00010193111560810697, "loss": 1.5622, "step": 14214 }, { "epoch": 0.5090694218131682, "grad_norm": 2.290910005569458, "learning_rate": 0.00010191951874516508, "loss": 1.1519, "step": 14215 }, { "epoch": 0.5091052339427364, "grad_norm": 2.0241501331329346, "learning_rate": 0.00010190792185639855, "loss": 1.575, "step": 14216 }, { "epoch": 0.5091410460723047, "grad_norm": 1.8566899299621582, "learning_rate": 0.00010189632494196335, "loss": 1.5069, "step": 14217 }, { "epoch": 0.509176858201873, "grad_norm": 1.780731201171875, "learning_rate": 0.00010188472800201558, "loss": 1.4822, "step": 14218 }, { "epoch": 0.5092126703314412, "grad_norm": 1.620386004447937, "learning_rate": 0.00010187313103671122, "loss": 1.4713, "step": 14219 }, { "epoch": 0.5092484824610095, "grad_norm": 1.4523818492889404, "learning_rate": 0.00010186153404620628, "loss": 1.557, "step": 14220 }, { "epoch": 0.5092842945905778, "grad_norm": 1.6923600435256958, "learning_rate": 0.00010184993703065682, "loss": 1.4402, "step": 14221 }, { "epoch": 0.5093201067201462, "grad_norm": 1.507537841796875, "learning_rate": 0.00010183833999021884, "loss": 1.3889, "step": 14222 }, { "epoch": 0.5093559188497144, "grad_norm": 1.9163919687271118, "learning_rate": 0.00010182674292504837, "loss": 1.5374, "step": 14223 }, { "epoch": 0.5093917309792827, "grad_norm": 2.0340425968170166, "learning_rate": 0.00010181514583530141, "loss": 1.4581, "step": 14224 }, { "epoch": 0.509427543108851, "grad_norm": 2.15578293800354, "learning_rate": 0.00010180354872113403, "loss": 1.6459, "step": 14225 }, { "epoch": 0.5094633552384192, "grad_norm": 1.6318968534469604, "learning_rate": 0.0001017919515827022, "loss": 1.5743, "step": 14226 }, { "epoch": 0.5094991673679875, "grad_norm": 1.603484034538269, "learning_rate": 0.000101780354420162, "loss": 1.4433, "step": 14227 }, { "epoch": 0.5095349794975558, "grad_norm": 1.2180871963500977, "learning_rate": 0.00010176875723366941, "loss": 1.3916, "step": 14228 }, { "epoch": 0.5095707916271242, "grad_norm": 1.6632503271102905, "learning_rate": 0.00010175716002338049, "loss": 1.3513, "step": 14229 }, { "epoch": 0.5096066037566924, "grad_norm": 1.5550673007965088, "learning_rate": 0.00010174556278945123, "loss": 1.3926, "step": 14230 }, { "epoch": 0.5096424158862607, "grad_norm": 1.7034988403320312, "learning_rate": 0.00010173396553203771, "loss": 1.4134, "step": 14231 }, { "epoch": 0.509678228015829, "grad_norm": 2.4369795322418213, "learning_rate": 0.00010172236825129588, "loss": 1.4433, "step": 14232 }, { "epoch": 0.5097140401453972, "grad_norm": 1.695494532585144, "learning_rate": 0.00010171077094738183, "loss": 1.6308, "step": 14233 }, { "epoch": 0.5097498522749655, "grad_norm": 1.9752106666564941, "learning_rate": 0.00010169917362045154, "loss": 1.4111, "step": 14234 }, { "epoch": 0.5097856644045338, "grad_norm": 1.5427497625350952, "learning_rate": 0.00010168757627066105, "loss": 1.403, "step": 14235 }, { "epoch": 0.5098214765341021, "grad_norm": 1.7571707963943481, "learning_rate": 0.00010167597889816644, "loss": 1.5686, "step": 14236 }, { "epoch": 0.5098572886636704, "grad_norm": 1.598278522491455, "learning_rate": 0.00010166438150312367, "loss": 1.6431, "step": 14237 }, { "epoch": 0.5098931007932387, "grad_norm": 1.8703125715255737, "learning_rate": 0.00010165278408568881, "loss": 1.3825, "step": 14238 }, { "epoch": 0.509928912922807, "grad_norm": 1.4079030752182007, "learning_rate": 0.00010164118664601785, "loss": 1.5527, "step": 14239 }, { "epoch": 0.5099647250523752, "grad_norm": 1.8269530534744263, "learning_rate": 0.0001016295891842669, "loss": 1.6979, "step": 14240 }, { "epoch": 0.5100005371819435, "grad_norm": 1.5141544342041016, "learning_rate": 0.00010161799170059187, "loss": 1.4384, "step": 14241 }, { "epoch": 0.5100363493115118, "grad_norm": 1.7513827085494995, "learning_rate": 0.00010160639419514888, "loss": 1.094, "step": 14242 }, { "epoch": 0.5100721614410801, "grad_norm": 1.3838261365890503, "learning_rate": 0.00010159479666809388, "loss": 1.3501, "step": 14243 }, { "epoch": 0.5101079735706484, "grad_norm": 1.4064701795578003, "learning_rate": 0.00010158319911958301, "loss": 1.5364, "step": 14244 }, { "epoch": 0.5101437857002167, "grad_norm": 1.7723150253295898, "learning_rate": 0.00010157160154977219, "loss": 1.2483, "step": 14245 }, { "epoch": 0.510179597829785, "grad_norm": 1.6301299333572388, "learning_rate": 0.00010156000395881752, "loss": 1.6533, "step": 14246 }, { "epoch": 0.5102154099593532, "grad_norm": 1.759650707244873, "learning_rate": 0.000101548406346875, "loss": 1.5664, "step": 14247 }, { "epoch": 0.5102512220889215, "grad_norm": 1.7729034423828125, "learning_rate": 0.00010153680871410065, "loss": 1.5183, "step": 14248 }, { "epoch": 0.5102870342184898, "grad_norm": 1.9804837703704834, "learning_rate": 0.00010152521106065058, "loss": 1.4567, "step": 14249 }, { "epoch": 0.5103228463480581, "grad_norm": 2.063938856124878, "learning_rate": 0.00010151361338668072, "loss": 1.6644, "step": 14250 }, { "epoch": 0.5103586584776264, "grad_norm": 1.628487467765808, "learning_rate": 0.00010150201569234717, "loss": 1.1544, "step": 14251 }, { "epoch": 0.5103944706071947, "grad_norm": 2.041860580444336, "learning_rate": 0.0001014904179778059, "loss": 1.6834, "step": 14252 }, { "epoch": 0.5104302827367629, "grad_norm": 1.646464467048645, "learning_rate": 0.000101478820243213, "loss": 1.7831, "step": 14253 }, { "epoch": 0.5104660948663312, "grad_norm": 1.8969465494155884, "learning_rate": 0.00010146722248872446, "loss": 1.4377, "step": 14254 }, { "epoch": 0.5105019069958995, "grad_norm": 1.9447615146636963, "learning_rate": 0.00010145562471449638, "loss": 1.4619, "step": 14255 }, { "epoch": 0.5105377191254677, "grad_norm": 1.6894398927688599, "learning_rate": 0.00010144402692068472, "loss": 1.6591, "step": 14256 }, { "epoch": 0.5105735312550361, "grad_norm": 2.469947576522827, "learning_rate": 0.00010143242910744555, "loss": 1.587, "step": 14257 }, { "epoch": 0.5106093433846044, "grad_norm": 1.7453415393829346, "learning_rate": 0.00010142083127493489, "loss": 1.5956, "step": 14258 }, { "epoch": 0.5106451555141727, "grad_norm": 1.6180731058120728, "learning_rate": 0.00010140923342330875, "loss": 1.5102, "step": 14259 }, { "epoch": 0.5106809676437409, "grad_norm": 2.149956226348877, "learning_rate": 0.00010139763555272323, "loss": 1.8065, "step": 14260 }, { "epoch": 0.5107167797733092, "grad_norm": 1.7655017375946045, "learning_rate": 0.0001013860376633343, "loss": 1.1825, "step": 14261 }, { "epoch": 0.5107525919028775, "grad_norm": 1.5514721870422363, "learning_rate": 0.00010137443975529804, "loss": 1.3279, "step": 14262 }, { "epoch": 0.5107884040324457, "grad_norm": 1.7187747955322266, "learning_rate": 0.00010136284182877045, "loss": 1.4676, "step": 14263 }, { "epoch": 0.5108242161620141, "grad_norm": 1.7656067609786987, "learning_rate": 0.0001013512438839076, "loss": 1.5519, "step": 14264 }, { "epoch": 0.5108600282915824, "grad_norm": 1.3366622924804688, "learning_rate": 0.00010133964592086547, "loss": 1.3903, "step": 14265 }, { "epoch": 0.5108958404211507, "grad_norm": 2.655071973800659, "learning_rate": 0.00010132804793980018, "loss": 1.3475, "step": 14266 }, { "epoch": 0.5109316525507189, "grad_norm": 1.4642434120178223, "learning_rate": 0.0001013164499408677, "loss": 1.736, "step": 14267 }, { "epoch": 0.5109674646802872, "grad_norm": 1.510628342628479, "learning_rate": 0.00010130485192422408, "loss": 1.3315, "step": 14268 }, { "epoch": 0.5110032768098555, "grad_norm": 1.7870886325836182, "learning_rate": 0.00010129325389002536, "loss": 1.4371, "step": 14269 }, { "epoch": 0.5110390889394237, "grad_norm": 1.452154517173767, "learning_rate": 0.00010128165583842757, "loss": 1.5644, "step": 14270 }, { "epoch": 0.5110749010689921, "grad_norm": 1.9587507247924805, "learning_rate": 0.00010127005776958676, "loss": 1.5341, "step": 14271 }, { "epoch": 0.5111107131985604, "grad_norm": 1.3689603805541992, "learning_rate": 0.00010125845968365895, "loss": 1.3541, "step": 14272 }, { "epoch": 0.5111465253281287, "grad_norm": 2.153557300567627, "learning_rate": 0.00010124686158080021, "loss": 1.5126, "step": 14273 }, { "epoch": 0.5111823374576969, "grad_norm": 1.3762892484664917, "learning_rate": 0.00010123526346116654, "loss": 1.5685, "step": 14274 }, { "epoch": 0.5112181495872652, "grad_norm": 1.7966989278793335, "learning_rate": 0.00010122366532491403, "loss": 1.2602, "step": 14275 }, { "epoch": 0.5112539617168335, "grad_norm": 1.1151483058929443, "learning_rate": 0.00010121206717219865, "loss": 1.4299, "step": 14276 }, { "epoch": 0.5112897738464017, "grad_norm": 1.83100163936615, "learning_rate": 0.00010120046900317646, "loss": 1.7361, "step": 14277 }, { "epoch": 0.5113255859759701, "grad_norm": 1.368430495262146, "learning_rate": 0.00010118887081800352, "loss": 1.5495, "step": 14278 }, { "epoch": 0.5113613981055384, "grad_norm": 1.5631763935089111, "learning_rate": 0.00010117727261683585, "loss": 1.3579, "step": 14279 }, { "epoch": 0.5113972102351066, "grad_norm": 3.0901801586151123, "learning_rate": 0.00010116567439982952, "loss": 1.7898, "step": 14280 }, { "epoch": 0.5114330223646749, "grad_norm": 1.6391278505325317, "learning_rate": 0.0001011540761671405, "loss": 1.1463, "step": 14281 }, { "epoch": 0.5114688344942432, "grad_norm": 1.5354077816009521, "learning_rate": 0.00010114247791892491, "loss": 1.4781, "step": 14282 }, { "epoch": 0.5115046466238115, "grad_norm": 2.2333121299743652, "learning_rate": 0.00010113087965533874, "loss": 1.5416, "step": 14283 }, { "epoch": 0.5115404587533797, "grad_norm": 1.9875568151474, "learning_rate": 0.00010111928137653808, "loss": 1.677, "step": 14284 }, { "epoch": 0.5115762708829481, "grad_norm": 1.5071085691452026, "learning_rate": 0.00010110768308267889, "loss": 1.2818, "step": 14285 }, { "epoch": 0.5116120830125164, "grad_norm": 1.5551725625991821, "learning_rate": 0.00010109608477391725, "loss": 1.5007, "step": 14286 }, { "epoch": 0.5116478951420846, "grad_norm": 2.123044967651367, "learning_rate": 0.00010108448645040919, "loss": 1.5999, "step": 14287 }, { "epoch": 0.5116837072716529, "grad_norm": 1.681573748588562, "learning_rate": 0.00010107288811231081, "loss": 1.2607, "step": 14288 }, { "epoch": 0.5117195194012212, "grad_norm": 1.9159537553787231, "learning_rate": 0.00010106128975977809, "loss": 1.5472, "step": 14289 }, { "epoch": 0.5117553315307894, "grad_norm": 1.6600178480148315, "learning_rate": 0.00010104969139296705, "loss": 1.6619, "step": 14290 }, { "epoch": 0.5117911436603577, "grad_norm": 1.5200772285461426, "learning_rate": 0.00010103809301203382, "loss": 1.4235, "step": 14291 }, { "epoch": 0.5118269557899261, "grad_norm": 1.593790054321289, "learning_rate": 0.00010102649461713434, "loss": 1.496, "step": 14292 }, { "epoch": 0.5118627679194944, "grad_norm": 1.5583328008651733, "learning_rate": 0.00010101489620842475, "loss": 1.5407, "step": 14293 }, { "epoch": 0.5118985800490626, "grad_norm": 1.877834677696228, "learning_rate": 0.00010100329778606101, "loss": 1.7199, "step": 14294 }, { "epoch": 0.5119343921786309, "grad_norm": 1.555271029472351, "learning_rate": 0.0001009916993501992, "loss": 1.4016, "step": 14295 }, { "epoch": 0.5119702043081992, "grad_norm": 1.6726455688476562, "learning_rate": 0.00010098010090099532, "loss": 1.306, "step": 14296 }, { "epoch": 0.5120060164377674, "grad_norm": 1.5355541706085205, "learning_rate": 0.00010096850243860549, "loss": 1.3042, "step": 14297 }, { "epoch": 0.5120418285673357, "grad_norm": 1.3412790298461914, "learning_rate": 0.00010095690396318569, "loss": 1.529, "step": 14298 }, { "epoch": 0.5120776406969041, "grad_norm": 2.1176607608795166, "learning_rate": 0.00010094530547489201, "loss": 1.731, "step": 14299 }, { "epoch": 0.5121134528264724, "grad_norm": 1.660214900970459, "learning_rate": 0.0001009337069738804, "loss": 1.5807, "step": 14300 }, { "epoch": 0.5121492649560406, "grad_norm": 1.5404260158538818, "learning_rate": 0.00010092210846030703, "loss": 1.3894, "step": 14301 }, { "epoch": 0.5121850770856089, "grad_norm": 1.514736294746399, "learning_rate": 0.00010091050993432787, "loss": 1.4389, "step": 14302 }, { "epoch": 0.5122208892151772, "grad_norm": 2.2876694202423096, "learning_rate": 0.00010089891139609895, "loss": 1.441, "step": 14303 }, { "epoch": 0.5122567013447454, "grad_norm": 1.7587552070617676, "learning_rate": 0.00010088731284577636, "loss": 1.4818, "step": 14304 }, { "epoch": 0.5122925134743137, "grad_norm": 1.566940426826477, "learning_rate": 0.0001008757142835161, "loss": 1.3247, "step": 14305 }, { "epoch": 0.5123283256038821, "grad_norm": 1.3367424011230469, "learning_rate": 0.00010086411570947424, "loss": 1.6241, "step": 14306 }, { "epoch": 0.5123641377334504, "grad_norm": 1.3539397716522217, "learning_rate": 0.0001008525171238068, "loss": 1.5768, "step": 14307 }, { "epoch": 0.5123999498630186, "grad_norm": 1.3885242938995361, "learning_rate": 0.00010084091852666988, "loss": 1.1827, "step": 14308 }, { "epoch": 0.5124357619925869, "grad_norm": 2.392909288406372, "learning_rate": 0.00010082931991821945, "loss": 1.4581, "step": 14309 }, { "epoch": 0.5124715741221552, "grad_norm": 1.3797001838684082, "learning_rate": 0.00010081772129861163, "loss": 1.6449, "step": 14310 }, { "epoch": 0.5125073862517234, "grad_norm": 1.8504879474639893, "learning_rate": 0.00010080612266800241, "loss": 1.502, "step": 14311 }, { "epoch": 0.5125431983812917, "grad_norm": 1.6124776601791382, "learning_rate": 0.0001007945240265478, "loss": 1.6722, "step": 14312 }, { "epoch": 0.5125790105108601, "grad_norm": 1.4799977540969849, "learning_rate": 0.00010078292537440397, "loss": 2.0444, "step": 14313 }, { "epoch": 0.5126148226404283, "grad_norm": 1.6208722591400146, "learning_rate": 0.00010077132671172685, "loss": 1.4614, "step": 14314 }, { "epoch": 0.5126506347699966, "grad_norm": 1.8112127780914307, "learning_rate": 0.00010075972803867254, "loss": 1.4982, "step": 14315 }, { "epoch": 0.5126864468995649, "grad_norm": 1.5253667831420898, "learning_rate": 0.00010074812935539703, "loss": 1.5199, "step": 14316 }, { "epoch": 0.5127222590291332, "grad_norm": 1.505617618560791, "learning_rate": 0.00010073653066205644, "loss": 1.5824, "step": 14317 }, { "epoch": 0.5127580711587014, "grad_norm": 1.8238226175308228, "learning_rate": 0.00010072493195880676, "loss": 1.239, "step": 14318 }, { "epoch": 0.5127938832882697, "grad_norm": 1.3497518301010132, "learning_rate": 0.00010071333324580408, "loss": 1.4374, "step": 14319 }, { "epoch": 0.5128296954178381, "grad_norm": 1.3062442541122437, "learning_rate": 0.00010070173452320442, "loss": 1.2157, "step": 14320 }, { "epoch": 0.5128655075474063, "grad_norm": 1.9949729442596436, "learning_rate": 0.0001006901357911638, "loss": 1.4122, "step": 14321 }, { "epoch": 0.5129013196769746, "grad_norm": 1.8121817111968994, "learning_rate": 0.00010067853704983832, "loss": 1.277, "step": 14322 }, { "epoch": 0.5129371318065429, "grad_norm": 1.786101222038269, "learning_rate": 0.00010066693829938398, "loss": 1.7341, "step": 14323 }, { "epoch": 0.5129729439361111, "grad_norm": 1.6490147113800049, "learning_rate": 0.00010065533953995688, "loss": 1.5394, "step": 14324 }, { "epoch": 0.5130087560656794, "grad_norm": 2.683562755584717, "learning_rate": 0.00010064374077171296, "loss": 1.633, "step": 14325 }, { "epoch": 0.5130445681952477, "grad_norm": 1.9112385511398315, "learning_rate": 0.00010063214199480842, "loss": 1.6243, "step": 14326 }, { "epoch": 0.5130803803248161, "grad_norm": 1.7935097217559814, "learning_rate": 0.00010062054320939916, "loss": 1.1181, "step": 14327 }, { "epoch": 0.5131161924543843, "grad_norm": 1.3657851219177246, "learning_rate": 0.00010060894441564135, "loss": 1.6137, "step": 14328 }, { "epoch": 0.5131520045839526, "grad_norm": 1.3997024297714233, "learning_rate": 0.00010059734561369095, "loss": 1.4953, "step": 14329 }, { "epoch": 0.5131878167135209, "grad_norm": 1.8069899082183838, "learning_rate": 0.00010058574680370403, "loss": 1.5614, "step": 14330 }, { "epoch": 0.5132236288430891, "grad_norm": 1.4796655178070068, "learning_rate": 0.00010057414798583664, "loss": 1.5437, "step": 14331 }, { "epoch": 0.5132594409726574, "grad_norm": 1.5718828439712524, "learning_rate": 0.00010056254916024483, "loss": 1.4086, "step": 14332 }, { "epoch": 0.5132952531022257, "grad_norm": 2.2739932537078857, "learning_rate": 0.00010055095032708466, "loss": 1.4159, "step": 14333 }, { "epoch": 0.5133310652317941, "grad_norm": 1.714881181716919, "learning_rate": 0.00010053935148651214, "loss": 1.4963, "step": 14334 }, { "epoch": 0.5133668773613623, "grad_norm": 1.6868878602981567, "learning_rate": 0.00010052775263868337, "loss": 1.2638, "step": 14335 }, { "epoch": 0.5134026894909306, "grad_norm": 1.6076358556747437, "learning_rate": 0.00010051615378375434, "loss": 1.7306, "step": 14336 }, { "epoch": 0.5134385016204989, "grad_norm": 1.4571819305419922, "learning_rate": 0.00010050455492188118, "loss": 1.7523, "step": 14337 }, { "epoch": 0.5134743137500671, "grad_norm": 1.9559829235076904, "learning_rate": 0.00010049295605321984, "loss": 1.1767, "step": 14338 }, { "epoch": 0.5135101258796354, "grad_norm": 1.8952054977416992, "learning_rate": 0.00010048135717792641, "loss": 1.3615, "step": 14339 }, { "epoch": 0.5135459380092037, "grad_norm": 3.2473466396331787, "learning_rate": 0.00010046975829615695, "loss": 1.7416, "step": 14340 }, { "epoch": 0.513581750138772, "grad_norm": 1.5299546718597412, "learning_rate": 0.00010045815940806751, "loss": 1.5168, "step": 14341 }, { "epoch": 0.5136175622683403, "grad_norm": 1.6583263874053955, "learning_rate": 0.00010044656051381411, "loss": 1.4021, "step": 14342 }, { "epoch": 0.5136533743979086, "grad_norm": 1.6013065576553345, "learning_rate": 0.00010043496161355282, "loss": 1.6424, "step": 14343 }, { "epoch": 0.5136891865274769, "grad_norm": 1.4253227710723877, "learning_rate": 0.00010042336270743968, "loss": 1.6771, "step": 14344 }, { "epoch": 0.5137249986570451, "grad_norm": 2.0314788818359375, "learning_rate": 0.00010041176379563073, "loss": 1.6002, "step": 14345 }, { "epoch": 0.5137608107866134, "grad_norm": 1.651528239250183, "learning_rate": 0.00010040016487828208, "loss": 1.6224, "step": 14346 }, { "epoch": 0.5137966229161817, "grad_norm": 1.8376193046569824, "learning_rate": 0.00010038856595554967, "loss": 1.4108, "step": 14347 }, { "epoch": 0.51383243504575, "grad_norm": 2.1696889400482178, "learning_rate": 0.00010037696702758963, "loss": 1.3069, "step": 14348 }, { "epoch": 0.5138682471753183, "grad_norm": 1.4069865942001343, "learning_rate": 0.00010036536809455796, "loss": 1.3905, "step": 14349 }, { "epoch": 0.5139040593048866, "grad_norm": 2.0680787563323975, "learning_rate": 0.00010035376915661076, "loss": 1.7688, "step": 14350 }, { "epoch": 0.5139398714344549, "grad_norm": 2.34431529045105, "learning_rate": 0.00010034217021390404, "loss": 1.3928, "step": 14351 }, { "epoch": 0.5139756835640231, "grad_norm": 1.8667163848876953, "learning_rate": 0.00010033057126659388, "loss": 1.6729, "step": 14352 }, { "epoch": 0.5140114956935914, "grad_norm": 1.555355429649353, "learning_rate": 0.0001003189723148363, "loss": 1.4538, "step": 14353 }, { "epoch": 0.5140473078231597, "grad_norm": 1.3511971235275269, "learning_rate": 0.00010030737335878735, "loss": 1.7526, "step": 14354 }, { "epoch": 0.514083119952728, "grad_norm": 1.7594633102416992, "learning_rate": 0.00010029577439860312, "loss": 1.3122, "step": 14355 }, { "epoch": 0.5141189320822963, "grad_norm": 1.5887043476104736, "learning_rate": 0.00010028417543443958, "loss": 1.536, "step": 14356 }, { "epoch": 0.5141547442118646, "grad_norm": 1.7294690608978271, "learning_rate": 0.00010027257646645285, "loss": 1.6294, "step": 14357 }, { "epoch": 0.5141905563414328, "grad_norm": 1.778171181678772, "learning_rate": 0.00010026097749479895, "loss": 1.3429, "step": 14358 }, { "epoch": 0.5142263684710011, "grad_norm": 2.0442750453948975, "learning_rate": 0.00010024937851963394, "loss": 1.1833, "step": 14359 }, { "epoch": 0.5142621806005694, "grad_norm": 1.6226485967636108, "learning_rate": 0.00010023777954111384, "loss": 1.1286, "step": 14360 }, { "epoch": 0.5142979927301377, "grad_norm": 1.7150418758392334, "learning_rate": 0.00010022618055939477, "loss": 1.7438, "step": 14361 }, { "epoch": 0.514333804859706, "grad_norm": 1.712880253791809, "learning_rate": 0.00010021458157463268, "loss": 1.5001, "step": 14362 }, { "epoch": 0.5143696169892743, "grad_norm": 2.2642722129821777, "learning_rate": 0.0001002029825869837, "loss": 1.6687, "step": 14363 }, { "epoch": 0.5144054291188426, "grad_norm": 1.4798762798309326, "learning_rate": 0.00010019138359660387, "loss": 1.6926, "step": 14364 }, { "epoch": 0.5144412412484108, "grad_norm": 2.6592345237731934, "learning_rate": 0.00010017978460364919, "loss": 1.6522, "step": 14365 }, { "epoch": 0.5144770533779791, "grad_norm": 1.899743914604187, "learning_rate": 0.00010016818560827577, "loss": 1.4547, "step": 14366 }, { "epoch": 0.5145128655075474, "grad_norm": 1.760870099067688, "learning_rate": 0.00010015658661063957, "loss": 1.5189, "step": 14367 }, { "epoch": 0.5145486776371156, "grad_norm": 1.7101985216140747, "learning_rate": 0.00010014498761089677, "loss": 1.691, "step": 14368 }, { "epoch": 0.514584489766684, "grad_norm": 1.8395957946777344, "learning_rate": 0.0001001333886092033, "loss": 1.4385, "step": 14369 }, { "epoch": 0.5146203018962523, "grad_norm": 1.5016250610351562, "learning_rate": 0.00010012178960571527, "loss": 1.7028, "step": 14370 }, { "epoch": 0.5146561140258206, "grad_norm": 1.3344529867172241, "learning_rate": 0.00010011019060058873, "loss": 1.5635, "step": 14371 }, { "epoch": 0.5146919261553888, "grad_norm": 1.4595482349395752, "learning_rate": 0.00010009859159397974, "loss": 1.5788, "step": 14372 }, { "epoch": 0.5147277382849571, "grad_norm": 1.6308788061141968, "learning_rate": 0.00010008699258604429, "loss": 1.2402, "step": 14373 }, { "epoch": 0.5147635504145254, "grad_norm": 1.6189227104187012, "learning_rate": 0.00010007539357693845, "loss": 1.4277, "step": 14374 }, { "epoch": 0.5147993625440936, "grad_norm": 1.546007513999939, "learning_rate": 0.00010006379456681834, "loss": 1.3149, "step": 14375 }, { "epoch": 0.514835174673662, "grad_norm": 1.400831699371338, "learning_rate": 0.00010005219555583991, "loss": 1.7555, "step": 14376 }, { "epoch": 0.5148709868032303, "grad_norm": 1.3352601528167725, "learning_rate": 0.00010004059654415927, "loss": 1.6648, "step": 14377 }, { "epoch": 0.5149067989327986, "grad_norm": 1.8999031782150269, "learning_rate": 0.00010002899753193246, "loss": 1.5502, "step": 14378 }, { "epoch": 0.5149426110623668, "grad_norm": 2.032066822052002, "learning_rate": 0.00010001739851931553, "loss": 1.6505, "step": 14379 }, { "epoch": 0.5149784231919351, "grad_norm": 1.6035372018814087, "learning_rate": 0.00010000579950646452, "loss": 1.7394, "step": 14380 }, { "epoch": 0.5150142353215034, "grad_norm": 1.6789970397949219, "learning_rate": 9.999420049353549e-05, "loss": 1.5343, "step": 14381 }, { "epoch": 0.5150500474510716, "grad_norm": 2.004620313644409, "learning_rate": 9.998260148068449e-05, "loss": 1.3025, "step": 14382 }, { "epoch": 0.51508585958064, "grad_norm": 2.159876823425293, "learning_rate": 9.997100246806755e-05, "loss": 1.7772, "step": 14383 }, { "epoch": 0.5151216717102083, "grad_norm": 1.748475193977356, "learning_rate": 9.995940345584074e-05, "loss": 1.7536, "step": 14384 }, { "epoch": 0.5151574838397766, "grad_norm": 1.7124178409576416, "learning_rate": 9.994780444416013e-05, "loss": 1.5581, "step": 14385 }, { "epoch": 0.5151932959693448, "grad_norm": 1.6359690427780151, "learning_rate": 9.99362054331817e-05, "loss": 1.7249, "step": 14386 }, { "epoch": 0.5152291080989131, "grad_norm": 1.629350185394287, "learning_rate": 9.992460642306156e-05, "loss": 1.2294, "step": 14387 }, { "epoch": 0.5152649202284814, "grad_norm": 1.9517797231674194, "learning_rate": 9.991300741395574e-05, "loss": 1.2735, "step": 14388 }, { "epoch": 0.5153007323580496, "grad_norm": 1.8916865587234497, "learning_rate": 9.99014084060203e-05, "loss": 1.5717, "step": 14389 }, { "epoch": 0.515336544487618, "grad_norm": 1.816402554512024, "learning_rate": 9.988980939941127e-05, "loss": 1.5327, "step": 14390 }, { "epoch": 0.5153723566171863, "grad_norm": 1.7927991151809692, "learning_rate": 9.987821039428474e-05, "loss": 1.1213, "step": 14391 }, { "epoch": 0.5154081687467545, "grad_norm": 1.3315367698669434, "learning_rate": 9.986661139079671e-05, "loss": 1.1219, "step": 14392 }, { "epoch": 0.5154439808763228, "grad_norm": 1.9263297319412231, "learning_rate": 9.985501238910325e-05, "loss": 1.585, "step": 14393 }, { "epoch": 0.5154797930058911, "grad_norm": 1.5922685861587524, "learning_rate": 9.984341338936043e-05, "loss": 1.5042, "step": 14394 }, { "epoch": 0.5155156051354594, "grad_norm": 2.0485384464263916, "learning_rate": 9.983181439172426e-05, "loss": 1.5463, "step": 14395 }, { "epoch": 0.5155514172650276, "grad_norm": 1.7884843349456787, "learning_rate": 9.982021539635084e-05, "loss": 1.5589, "step": 14396 }, { "epoch": 0.515587229394596, "grad_norm": 1.4187160730361938, "learning_rate": 9.980861640339614e-05, "loss": 1.1647, "step": 14397 }, { "epoch": 0.5156230415241643, "grad_norm": 1.627793312072754, "learning_rate": 9.979701741301631e-05, "loss": 1.2982, "step": 14398 }, { "epoch": 0.5156588536537325, "grad_norm": 1.3230700492858887, "learning_rate": 9.978541842536732e-05, "loss": 1.4045, "step": 14399 }, { "epoch": 0.5156946657833008, "grad_norm": 1.5387710332870483, "learning_rate": 9.977381944060525e-05, "loss": 1.4987, "step": 14400 }, { "epoch": 0.5157304779128691, "grad_norm": 1.577405333518982, "learning_rate": 9.976222045888614e-05, "loss": 1.5148, "step": 14401 }, { "epoch": 0.5157662900424373, "grad_norm": 1.404995322227478, "learning_rate": 9.975062148036608e-05, "loss": 1.537, "step": 14402 }, { "epoch": 0.5158021021720056, "grad_norm": 1.305178165435791, "learning_rate": 9.97390225052011e-05, "loss": 1.6459, "step": 14403 }, { "epoch": 0.515837914301574, "grad_norm": 2.065877676010132, "learning_rate": 9.972742353354717e-05, "loss": 1.6797, "step": 14404 }, { "epoch": 0.5158737264311423, "grad_norm": 1.7629402875900269, "learning_rate": 9.971582456556045e-05, "loss": 1.4101, "step": 14405 }, { "epoch": 0.5159095385607105, "grad_norm": 2.007244348526001, "learning_rate": 9.970422560139692e-05, "loss": 1.5678, "step": 14406 }, { "epoch": 0.5159453506902788, "grad_norm": 1.5562615394592285, "learning_rate": 9.969262664121267e-05, "loss": 1.6061, "step": 14407 }, { "epoch": 0.5159811628198471, "grad_norm": 1.781718134880066, "learning_rate": 9.968102768516371e-05, "loss": 1.6981, "step": 14408 }, { "epoch": 0.5160169749494153, "grad_norm": 1.2723973989486694, "learning_rate": 9.966942873340614e-05, "loss": 1.3363, "step": 14409 }, { "epoch": 0.5160527870789836, "grad_norm": 1.9770539999008179, "learning_rate": 9.965782978609595e-05, "loss": 1.2804, "step": 14410 }, { "epoch": 0.516088599208552, "grad_norm": 1.5593479871749878, "learning_rate": 9.964623084338926e-05, "loss": 1.5882, "step": 14411 }, { "epoch": 0.5161244113381203, "grad_norm": 1.7799978256225586, "learning_rate": 9.963463190544208e-05, "loss": 1.4457, "step": 14412 }, { "epoch": 0.5161602234676885, "grad_norm": 1.773402214050293, "learning_rate": 9.96230329724104e-05, "loss": 1.6663, "step": 14413 }, { "epoch": 0.5161960355972568, "grad_norm": 1.4051496982574463, "learning_rate": 9.961143404445038e-05, "loss": 1.439, "step": 14414 }, { "epoch": 0.5162318477268251, "grad_norm": 1.4480347633361816, "learning_rate": 9.959983512171796e-05, "loss": 1.4239, "step": 14415 }, { "epoch": 0.5162676598563933, "grad_norm": 1.744836449623108, "learning_rate": 9.95882362043693e-05, "loss": 1.5925, "step": 14416 }, { "epoch": 0.5163034719859616, "grad_norm": 2.2484891414642334, "learning_rate": 9.957663729256033e-05, "loss": 1.4217, "step": 14417 }, { "epoch": 0.51633928411553, "grad_norm": 1.41450834274292, "learning_rate": 9.956503838644719e-05, "loss": 1.4532, "step": 14418 }, { "epoch": 0.5163750962450983, "grad_norm": 1.5045841932296753, "learning_rate": 9.95534394861859e-05, "loss": 1.5254, "step": 14419 }, { "epoch": 0.5164109083746665, "grad_norm": 1.430303931236267, "learning_rate": 9.954184059193251e-05, "loss": 1.5091, "step": 14420 }, { "epoch": 0.5164467205042348, "grad_norm": 2.064718008041382, "learning_rate": 9.953024170384309e-05, "loss": 1.3218, "step": 14421 }, { "epoch": 0.5164825326338031, "grad_norm": 1.8702689409255981, "learning_rate": 9.95186428220736e-05, "loss": 1.3991, "step": 14422 }, { "epoch": 0.5165183447633713, "grad_norm": 1.897904634475708, "learning_rate": 9.950704394678021e-05, "loss": 1.3649, "step": 14423 }, { "epoch": 0.5165541568929396, "grad_norm": 1.8715026378631592, "learning_rate": 9.949544507811885e-05, "loss": 1.4766, "step": 14424 }, { "epoch": 0.516589969022508, "grad_norm": 1.6160260438919067, "learning_rate": 9.948384621624569e-05, "loss": 1.7052, "step": 14425 }, { "epoch": 0.5166257811520762, "grad_norm": 1.8741852045059204, "learning_rate": 9.947224736131662e-05, "loss": 1.4936, "step": 14426 }, { "epoch": 0.5166615932816445, "grad_norm": 2.4261343479156494, "learning_rate": 9.946064851348788e-05, "loss": 1.4065, "step": 14427 }, { "epoch": 0.5166974054112128, "grad_norm": 1.7130334377288818, "learning_rate": 9.944904967291533e-05, "loss": 1.1564, "step": 14428 }, { "epoch": 0.516733217540781, "grad_norm": 1.6435831785202026, "learning_rate": 9.94374508397552e-05, "loss": 1.2979, "step": 14429 }, { "epoch": 0.5167690296703493, "grad_norm": 1.4821659326553345, "learning_rate": 9.94258520141634e-05, "loss": 1.3349, "step": 14430 }, { "epoch": 0.5168048417999176, "grad_norm": 1.582342267036438, "learning_rate": 9.941425319629598e-05, "loss": 1.6496, "step": 14431 }, { "epoch": 0.516840653929486, "grad_norm": 1.3501367568969727, "learning_rate": 9.94026543863091e-05, "loss": 1.2989, "step": 14432 }, { "epoch": 0.5168764660590542, "grad_norm": 1.5915173292160034, "learning_rate": 9.939105558435866e-05, "loss": 1.7494, "step": 14433 }, { "epoch": 0.5169122781886225, "grad_norm": 1.774556040763855, "learning_rate": 9.937945679060085e-05, "loss": 1.3733, "step": 14434 }, { "epoch": 0.5169480903181908, "grad_norm": 2.2483110427856445, "learning_rate": 9.93678580051916e-05, "loss": 1.4633, "step": 14435 }, { "epoch": 0.516983902447759, "grad_norm": 1.5181875228881836, "learning_rate": 9.935625922828705e-05, "loss": 1.6502, "step": 14436 }, { "epoch": 0.5170197145773273, "grad_norm": 1.6699066162109375, "learning_rate": 9.934466046004313e-05, "loss": 1.4195, "step": 14437 }, { "epoch": 0.5170555267068956, "grad_norm": 1.4470813274383545, "learning_rate": 9.933306170061604e-05, "loss": 1.4196, "step": 14438 }, { "epoch": 0.517091338836464, "grad_norm": 1.7814797163009644, "learning_rate": 9.932146295016172e-05, "loss": 1.5409, "step": 14439 }, { "epoch": 0.5171271509660322, "grad_norm": 1.530346155166626, "learning_rate": 9.930986420883623e-05, "loss": 1.4033, "step": 14440 }, { "epoch": 0.5171629630956005, "grad_norm": 3.921464443206787, "learning_rate": 9.929826547679563e-05, "loss": 1.6177, "step": 14441 }, { "epoch": 0.5171987752251688, "grad_norm": 1.572906494140625, "learning_rate": 9.928666675419595e-05, "loss": 1.6209, "step": 14442 }, { "epoch": 0.517234587354737, "grad_norm": 2.0591559410095215, "learning_rate": 9.927506804119326e-05, "loss": 1.1671, "step": 14443 }, { "epoch": 0.5172703994843053, "grad_norm": 1.4399479627609253, "learning_rate": 9.926346933794357e-05, "loss": 1.4316, "step": 14444 }, { "epoch": 0.5173062116138736, "grad_norm": 1.3752440214157104, "learning_rate": 9.925187064460299e-05, "loss": 1.4705, "step": 14445 }, { "epoch": 0.517342023743442, "grad_norm": 1.7031986713409424, "learning_rate": 9.924027196132747e-05, "loss": 1.3291, "step": 14446 }, { "epoch": 0.5173778358730102, "grad_norm": 1.779016137123108, "learning_rate": 9.922867328827319e-05, "loss": 1.2053, "step": 14447 }, { "epoch": 0.5174136480025785, "grad_norm": 1.4940849542617798, "learning_rate": 9.921707462559608e-05, "loss": 1.6245, "step": 14448 }, { "epoch": 0.5174494601321468, "grad_norm": 1.6371047496795654, "learning_rate": 9.92054759734522e-05, "loss": 1.2594, "step": 14449 }, { "epoch": 0.517485272261715, "grad_norm": 2.4114179611206055, "learning_rate": 9.919387733199764e-05, "loss": 1.4802, "step": 14450 }, { "epoch": 0.5175210843912833, "grad_norm": 1.8427387475967407, "learning_rate": 9.91822787013884e-05, "loss": 1.7216, "step": 14451 }, { "epoch": 0.5175568965208516, "grad_norm": 1.5938351154327393, "learning_rate": 9.917068008178056e-05, "loss": 1.3973, "step": 14452 }, { "epoch": 0.51759270865042, "grad_norm": 1.515212059020996, "learning_rate": 9.915908147333013e-05, "loss": 1.3184, "step": 14453 }, { "epoch": 0.5176285207799882, "grad_norm": 2.3098485469818115, "learning_rate": 9.91474828761932e-05, "loss": 1.3732, "step": 14454 }, { "epoch": 0.5176643329095565, "grad_norm": 1.29752779006958, "learning_rate": 9.913588429052578e-05, "loss": 1.3128, "step": 14455 }, { "epoch": 0.5177001450391248, "grad_norm": 1.9097155332565308, "learning_rate": 9.912428571648393e-05, "loss": 1.5393, "step": 14456 }, { "epoch": 0.517735957168693, "grad_norm": 1.6922508478164673, "learning_rate": 9.91126871542237e-05, "loss": 1.4879, "step": 14457 }, { "epoch": 0.5177717692982613, "grad_norm": 2.132388114929199, "learning_rate": 9.910108860390107e-05, "loss": 1.5717, "step": 14458 }, { "epoch": 0.5178075814278296, "grad_norm": 2.9997427463531494, "learning_rate": 9.908949006567218e-05, "loss": 1.6868, "step": 14459 }, { "epoch": 0.5178433935573978, "grad_norm": 1.777268409729004, "learning_rate": 9.9077891539693e-05, "loss": 1.6998, "step": 14460 }, { "epoch": 0.5178792056869662, "grad_norm": 2.5405521392822266, "learning_rate": 9.906629302611961e-05, "loss": 1.8193, "step": 14461 }, { "epoch": 0.5179150178165345, "grad_norm": 2.0599591732025146, "learning_rate": 9.905469452510803e-05, "loss": 1.7768, "step": 14462 }, { "epoch": 0.5179508299461028, "grad_norm": 1.894958734512329, "learning_rate": 9.904309603681433e-05, "loss": 1.4591, "step": 14463 }, { "epoch": 0.517986642075671, "grad_norm": 1.3576775789260864, "learning_rate": 9.903149756139453e-05, "loss": 1.4085, "step": 14464 }, { "epoch": 0.5180224542052393, "grad_norm": 1.7294950485229492, "learning_rate": 9.901989909900469e-05, "loss": 1.7503, "step": 14465 }, { "epoch": 0.5180582663348076, "grad_norm": 1.5523263216018677, "learning_rate": 9.900830064980084e-05, "loss": 1.5677, "step": 14466 }, { "epoch": 0.5180940784643758, "grad_norm": 1.6701172590255737, "learning_rate": 9.899670221393901e-05, "loss": 1.7297, "step": 14467 }, { "epoch": 0.5181298905939442, "grad_norm": 1.5365049839019775, "learning_rate": 9.89851037915753e-05, "loss": 1.4214, "step": 14468 }, { "epoch": 0.5181657027235125, "grad_norm": 1.5968941450119019, "learning_rate": 9.897350538286566e-05, "loss": 1.4887, "step": 14469 }, { "epoch": 0.5182015148530807, "grad_norm": 2.073341131210327, "learning_rate": 9.896190698796621e-05, "loss": 1.4142, "step": 14470 }, { "epoch": 0.518237326982649, "grad_norm": 2.2669150829315186, "learning_rate": 9.895030860703295e-05, "loss": 1.6122, "step": 14471 }, { "epoch": 0.5182731391122173, "grad_norm": 1.5452347993850708, "learning_rate": 9.893871024022195e-05, "loss": 1.2547, "step": 14472 }, { "epoch": 0.5183089512417856, "grad_norm": 1.556300401687622, "learning_rate": 9.89271118876892e-05, "loss": 1.587, "step": 14473 }, { "epoch": 0.5183447633713538, "grad_norm": 1.6784199476242065, "learning_rate": 9.891551354959082e-05, "loss": 1.5504, "step": 14474 }, { "epoch": 0.5183805755009222, "grad_norm": 2.0387167930603027, "learning_rate": 9.890391522608278e-05, "loss": 1.5993, "step": 14475 }, { "epoch": 0.5184163876304905, "grad_norm": 1.639019250869751, "learning_rate": 9.889231691732115e-05, "loss": 1.5219, "step": 14476 }, { "epoch": 0.5184521997600587, "grad_norm": 1.59255051612854, "learning_rate": 9.888071862346198e-05, "loss": 1.5876, "step": 14477 }, { "epoch": 0.518488011889627, "grad_norm": 1.663064956665039, "learning_rate": 9.886912034466127e-05, "loss": 1.0076, "step": 14478 }, { "epoch": 0.5185238240191953, "grad_norm": 1.6870861053466797, "learning_rate": 9.885752208107511e-05, "loss": 1.5689, "step": 14479 }, { "epoch": 0.5185596361487635, "grad_norm": 1.2715725898742676, "learning_rate": 9.88459238328595e-05, "loss": 1.3663, "step": 14480 }, { "epoch": 0.5185954482783318, "grad_norm": 2.3270177841186523, "learning_rate": 9.883432560017052e-05, "loss": 1.6806, "step": 14481 }, { "epoch": 0.5186312604079002, "grad_norm": 2.031411647796631, "learning_rate": 9.882272738316418e-05, "loss": 1.6844, "step": 14482 }, { "epoch": 0.5186670725374685, "grad_norm": 1.4794574975967407, "learning_rate": 9.88111291819965e-05, "loss": 1.5449, "step": 14483 }, { "epoch": 0.5187028846670367, "grad_norm": 1.525395154953003, "learning_rate": 9.879953099682358e-05, "loss": 1.4823, "step": 14484 }, { "epoch": 0.518738696796605, "grad_norm": 2.1387875080108643, "learning_rate": 9.878793282780137e-05, "loss": 1.4335, "step": 14485 }, { "epoch": 0.5187745089261733, "grad_norm": 1.8972395658493042, "learning_rate": 9.877633467508602e-05, "loss": 1.3409, "step": 14486 }, { "epoch": 0.5188103210557415, "grad_norm": 1.8318805694580078, "learning_rate": 9.876473653883346e-05, "loss": 1.3479, "step": 14487 }, { "epoch": 0.5188461331853098, "grad_norm": 1.8570977449417114, "learning_rate": 9.87531384191998e-05, "loss": 1.4737, "step": 14488 }, { "epoch": 0.5188819453148782, "grad_norm": 1.4921190738677979, "learning_rate": 9.874154031634103e-05, "loss": 1.5501, "step": 14489 }, { "epoch": 0.5189177574444465, "grad_norm": 2.1454734802246094, "learning_rate": 9.872994223041325e-05, "loss": 1.7193, "step": 14490 }, { "epoch": 0.5189535695740147, "grad_norm": 1.6640002727508545, "learning_rate": 9.871834416157246e-05, "loss": 1.1278, "step": 14491 }, { "epoch": 0.518989381703583, "grad_norm": 1.7006652355194092, "learning_rate": 9.870674610997467e-05, "loss": 1.6311, "step": 14492 }, { "epoch": 0.5190251938331513, "grad_norm": 1.4623268842697144, "learning_rate": 9.869514807577595e-05, "loss": 1.0958, "step": 14493 }, { "epoch": 0.5190610059627195, "grad_norm": 1.8661326169967651, "learning_rate": 9.868355005913232e-05, "loss": 1.2537, "step": 14494 }, { "epoch": 0.5190968180922878, "grad_norm": 1.5580703020095825, "learning_rate": 9.867195206019985e-05, "loss": 1.6413, "step": 14495 }, { "epoch": 0.5191326302218562, "grad_norm": 2.2893712520599365, "learning_rate": 9.866035407913452e-05, "loss": 1.5394, "step": 14496 }, { "epoch": 0.5191684423514245, "grad_norm": 1.4904677867889404, "learning_rate": 9.864875611609243e-05, "loss": 1.5643, "step": 14497 }, { "epoch": 0.5192042544809927, "grad_norm": 1.5410066843032837, "learning_rate": 9.863715817122956e-05, "loss": 1.3726, "step": 14498 }, { "epoch": 0.519240066610561, "grad_norm": 1.7164530754089355, "learning_rate": 9.862556024470199e-05, "loss": 1.6264, "step": 14499 }, { "epoch": 0.5192758787401293, "grad_norm": 1.4902276992797852, "learning_rate": 9.861396233666574e-05, "loss": 1.5273, "step": 14500 }, { "epoch": 0.5193116908696975, "grad_norm": 1.731277346611023, "learning_rate": 9.860236444727679e-05, "loss": 1.5882, "step": 14501 }, { "epoch": 0.5193475029992658, "grad_norm": 1.6587519645690918, "learning_rate": 9.859076657669127e-05, "loss": 1.3716, "step": 14502 }, { "epoch": 0.5193833151288342, "grad_norm": 2.1876940727233887, "learning_rate": 9.857916872506513e-05, "loss": 1.5538, "step": 14503 }, { "epoch": 0.5194191272584024, "grad_norm": 1.7598111629486084, "learning_rate": 9.856757089255448e-05, "loss": 1.4646, "step": 14504 }, { "epoch": 0.5194549393879707, "grad_norm": 1.1007812023162842, "learning_rate": 9.85559730793153e-05, "loss": 1.2949, "step": 14505 }, { "epoch": 0.519490751517539, "grad_norm": 1.786514163017273, "learning_rate": 9.854437528550364e-05, "loss": 1.4212, "step": 14506 }, { "epoch": 0.5195265636471073, "grad_norm": 1.267168641090393, "learning_rate": 9.853277751127552e-05, "loss": 1.5191, "step": 14507 }, { "epoch": 0.5195623757766755, "grad_norm": 1.632846713066101, "learning_rate": 9.852117975678701e-05, "loss": 1.3769, "step": 14508 }, { "epoch": 0.5195981879062438, "grad_norm": 1.708034873008728, "learning_rate": 9.850958202219414e-05, "loss": 1.4926, "step": 14509 }, { "epoch": 0.5196340000358122, "grad_norm": 1.7593141794204712, "learning_rate": 9.849798430765286e-05, "loss": 1.3824, "step": 14510 }, { "epoch": 0.5196698121653804, "grad_norm": 2.2882802486419678, "learning_rate": 9.848638661331933e-05, "loss": 1.4505, "step": 14511 }, { "epoch": 0.5197056242949487, "grad_norm": 1.923661231994629, "learning_rate": 9.847478893934944e-05, "loss": 1.5545, "step": 14512 }, { "epoch": 0.519741436424517, "grad_norm": 2.511514186859131, "learning_rate": 9.846319128589936e-05, "loss": 1.2665, "step": 14513 }, { "epoch": 0.5197772485540852, "grad_norm": 1.4388993978500366, "learning_rate": 9.845159365312501e-05, "loss": 1.2587, "step": 14514 }, { "epoch": 0.5198130606836535, "grad_norm": 1.2576457262039185, "learning_rate": 9.84399960411825e-05, "loss": 1.1563, "step": 14515 }, { "epoch": 0.5198488728132218, "grad_norm": 2.3293657302856445, "learning_rate": 9.842839845022781e-05, "loss": 1.6719, "step": 14516 }, { "epoch": 0.5198846849427902, "grad_norm": 1.6276086568832397, "learning_rate": 9.841680088041701e-05, "loss": 1.5341, "step": 14517 }, { "epoch": 0.5199204970723584, "grad_norm": 1.4863203763961792, "learning_rate": 9.840520333190615e-05, "loss": 1.3341, "step": 14518 }, { "epoch": 0.5199563092019267, "grad_norm": 1.4681768417358398, "learning_rate": 9.839360580485115e-05, "loss": 1.2057, "step": 14519 }, { "epoch": 0.519992121331495, "grad_norm": 2.524519443511963, "learning_rate": 9.838200829940818e-05, "loss": 1.4854, "step": 14520 }, { "epoch": 0.5200279334610632, "grad_norm": 1.424342155456543, "learning_rate": 9.837041081573312e-05, "loss": 1.1943, "step": 14521 }, { "epoch": 0.5200637455906315, "grad_norm": 1.5036619901657104, "learning_rate": 9.835881335398216e-05, "loss": 1.5235, "step": 14522 }, { "epoch": 0.5200995577201998, "grad_norm": 1.508616328239441, "learning_rate": 9.834721591431118e-05, "loss": 1.6061, "step": 14523 }, { "epoch": 0.5201353698497682, "grad_norm": 1.989552617073059, "learning_rate": 9.833561849687634e-05, "loss": 1.6041, "step": 14524 }, { "epoch": 0.5201711819793364, "grad_norm": 1.4946794509887695, "learning_rate": 9.832402110183355e-05, "loss": 1.4669, "step": 14525 }, { "epoch": 0.5202069941089047, "grad_norm": 1.4868971109390259, "learning_rate": 9.831242372933896e-05, "loss": 1.4832, "step": 14526 }, { "epoch": 0.520242806238473, "grad_norm": 1.2756190299987793, "learning_rate": 9.830082637954851e-05, "loss": 1.2785, "step": 14527 }, { "epoch": 0.5202786183680412, "grad_norm": 1.9612704515457153, "learning_rate": 9.828922905261819e-05, "loss": 1.355, "step": 14528 }, { "epoch": 0.5203144304976095, "grad_norm": 2.44775652885437, "learning_rate": 9.827763174870417e-05, "loss": 1.4828, "step": 14529 }, { "epoch": 0.5203502426271778, "grad_norm": 1.8801780939102173, "learning_rate": 9.826603446796231e-05, "loss": 1.1552, "step": 14530 }, { "epoch": 0.5203860547567462, "grad_norm": 2.448315143585205, "learning_rate": 9.82544372105488e-05, "loss": 1.5563, "step": 14531 }, { "epoch": 0.5204218668863144, "grad_norm": 2.337183713912964, "learning_rate": 9.824283997661952e-05, "loss": 1.6272, "step": 14532 }, { "epoch": 0.5204576790158827, "grad_norm": 1.5295284986495972, "learning_rate": 9.823124276633061e-05, "loss": 1.7039, "step": 14533 }, { "epoch": 0.520493491145451, "grad_norm": 2.0706472396850586, "learning_rate": 9.821964557983799e-05, "loss": 1.4274, "step": 14534 }, { "epoch": 0.5205293032750192, "grad_norm": 1.6976041793823242, "learning_rate": 9.820804841729782e-05, "loss": 1.3637, "step": 14535 }, { "epoch": 0.5205651154045875, "grad_norm": 1.4130737781524658, "learning_rate": 9.819645127886602e-05, "loss": 1.3593, "step": 14536 }, { "epoch": 0.5206009275341558, "grad_norm": 1.3637433052062988, "learning_rate": 9.818485416469861e-05, "loss": 1.4258, "step": 14537 }, { "epoch": 0.5206367396637241, "grad_norm": 1.7225490808486938, "learning_rate": 9.817325707495167e-05, "loss": 1.5675, "step": 14538 }, { "epoch": 0.5206725517932924, "grad_norm": 2.0762996673583984, "learning_rate": 9.816166000978119e-05, "loss": 1.2102, "step": 14539 }, { "epoch": 0.5207083639228607, "grad_norm": 1.6375013589859009, "learning_rate": 9.815006296934321e-05, "loss": 1.5834, "step": 14540 }, { "epoch": 0.520744176052429, "grad_norm": 1.525462031364441, "learning_rate": 9.813846595379371e-05, "loss": 1.6418, "step": 14541 }, { "epoch": 0.5207799881819972, "grad_norm": 1.9003756046295166, "learning_rate": 9.812686896328882e-05, "loss": 1.9051, "step": 14542 }, { "epoch": 0.5208158003115655, "grad_norm": 1.4063869714736938, "learning_rate": 9.811527199798443e-05, "loss": 1.2691, "step": 14543 }, { "epoch": 0.5208516124411338, "grad_norm": 2.0352799892425537, "learning_rate": 9.810367505803667e-05, "loss": 1.5649, "step": 14544 }, { "epoch": 0.5208874245707021, "grad_norm": 2.369400978088379, "learning_rate": 9.80920781436015e-05, "loss": 1.5076, "step": 14545 }, { "epoch": 0.5209232367002704, "grad_norm": 1.6463871002197266, "learning_rate": 9.808048125483494e-05, "loss": 1.2952, "step": 14546 }, { "epoch": 0.5209590488298387, "grad_norm": 1.4209014177322388, "learning_rate": 9.806888439189306e-05, "loss": 1.6418, "step": 14547 }, { "epoch": 0.520994860959407, "grad_norm": 1.5328431129455566, "learning_rate": 9.805728755493182e-05, "loss": 1.1911, "step": 14548 }, { "epoch": 0.5210306730889752, "grad_norm": 1.4673058986663818, "learning_rate": 9.804569074410729e-05, "loss": 1.3006, "step": 14549 }, { "epoch": 0.5210664852185435, "grad_norm": 1.8996427059173584, "learning_rate": 9.803409395957545e-05, "loss": 1.54, "step": 14550 }, { "epoch": 0.5211022973481118, "grad_norm": 1.5183322429656982, "learning_rate": 9.802249720149236e-05, "loss": 1.444, "step": 14551 }, { "epoch": 0.5211381094776801, "grad_norm": 2.154158353805542, "learning_rate": 9.8010900470014e-05, "loss": 1.3751, "step": 14552 }, { "epoch": 0.5211739216072484, "grad_norm": 1.3777554035186768, "learning_rate": 9.799930376529643e-05, "loss": 1.5448, "step": 14553 }, { "epoch": 0.5212097337368167, "grad_norm": 1.716951847076416, "learning_rate": 9.798770708749563e-05, "loss": 1.4537, "step": 14554 }, { "epoch": 0.5212455458663849, "grad_norm": 1.4311693906784058, "learning_rate": 9.797611043676764e-05, "loss": 1.6834, "step": 14555 }, { "epoch": 0.5212813579959532, "grad_norm": 1.699777603149414, "learning_rate": 9.796451381326849e-05, "loss": 1.7289, "step": 14556 }, { "epoch": 0.5213171701255215, "grad_norm": 1.704720139503479, "learning_rate": 9.795291721715414e-05, "loss": 1.7788, "step": 14557 }, { "epoch": 0.5213529822550897, "grad_norm": 1.8193825483322144, "learning_rate": 9.794132064858069e-05, "loss": 1.7059, "step": 14558 }, { "epoch": 0.5213887943846581, "grad_norm": 1.9302318096160889, "learning_rate": 9.792972410770409e-05, "loss": 1.6084, "step": 14559 }, { "epoch": 0.5214246065142264, "grad_norm": 1.3157739639282227, "learning_rate": 9.791812759468039e-05, "loss": 1.2628, "step": 14560 }, { "epoch": 0.5214604186437947, "grad_norm": 1.3691009283065796, "learning_rate": 9.79065311096656e-05, "loss": 1.2926, "step": 14561 }, { "epoch": 0.5214962307733629, "grad_norm": 1.2893990278244019, "learning_rate": 9.789493465281574e-05, "loss": 1.6619, "step": 14562 }, { "epoch": 0.5215320429029312, "grad_norm": 1.7624584436416626, "learning_rate": 9.788333822428682e-05, "loss": 1.5958, "step": 14563 }, { "epoch": 0.5215678550324995, "grad_norm": 1.5968478918075562, "learning_rate": 9.787174182423484e-05, "loss": 1.5239, "step": 14564 }, { "epoch": 0.5216036671620677, "grad_norm": 2.377908945083618, "learning_rate": 9.786014545281585e-05, "loss": 1.5145, "step": 14565 }, { "epoch": 0.5216394792916361, "grad_norm": 1.5355181694030762, "learning_rate": 9.78485491101858e-05, "loss": 1.5049, "step": 14566 }, { "epoch": 0.5216752914212044, "grad_norm": 1.638087272644043, "learning_rate": 9.783695279650079e-05, "loss": 1.3355, "step": 14567 }, { "epoch": 0.5217111035507727, "grad_norm": 2.3670177459716797, "learning_rate": 9.782535651191676e-05, "loss": 1.2759, "step": 14568 }, { "epoch": 0.5217469156803409, "grad_norm": 2.8764686584472656, "learning_rate": 9.781376025658977e-05, "loss": 1.5412, "step": 14569 }, { "epoch": 0.5217827278099092, "grad_norm": 1.5233416557312012, "learning_rate": 9.78021640306758e-05, "loss": 1.1315, "step": 14570 }, { "epoch": 0.5218185399394775, "grad_norm": 1.8155995607376099, "learning_rate": 9.77905678343309e-05, "loss": 1.769, "step": 14571 }, { "epoch": 0.5218543520690457, "grad_norm": 1.6024277210235596, "learning_rate": 9.777897166771107e-05, "loss": 1.2893, "step": 14572 }, { "epoch": 0.5218901641986141, "grad_norm": 1.7057386636734009, "learning_rate": 9.776737553097227e-05, "loss": 1.6424, "step": 14573 }, { "epoch": 0.5219259763281824, "grad_norm": 2.125110149383545, "learning_rate": 9.775577942427058e-05, "loss": 1.3813, "step": 14574 }, { "epoch": 0.5219617884577507, "grad_norm": 1.6502152681350708, "learning_rate": 9.774418334776196e-05, "loss": 1.4526, "step": 14575 }, { "epoch": 0.5219976005873189, "grad_norm": 1.7954357862472534, "learning_rate": 9.773258730160247e-05, "loss": 1.5242, "step": 14576 }, { "epoch": 0.5220334127168872, "grad_norm": 1.6591635942459106, "learning_rate": 9.772099128594808e-05, "loss": 1.3148, "step": 14577 }, { "epoch": 0.5220692248464555, "grad_norm": 1.6281665563583374, "learning_rate": 9.770939530095482e-05, "loss": 1.6924, "step": 14578 }, { "epoch": 0.5221050369760237, "grad_norm": 1.7232989072799683, "learning_rate": 9.769779934677869e-05, "loss": 1.7348, "step": 14579 }, { "epoch": 0.5221408491055921, "grad_norm": 2.068788766860962, "learning_rate": 9.76862034235757e-05, "loss": 1.4131, "step": 14580 }, { "epoch": 0.5221766612351604, "grad_norm": 2.2537050247192383, "learning_rate": 9.767460753150186e-05, "loss": 1.3277, "step": 14581 }, { "epoch": 0.5222124733647286, "grad_norm": 2.0896992683410645, "learning_rate": 9.766301167071316e-05, "loss": 1.5849, "step": 14582 }, { "epoch": 0.5222482854942969, "grad_norm": 1.4738152027130127, "learning_rate": 9.765141584136565e-05, "loss": 1.4946, "step": 14583 }, { "epoch": 0.5222840976238652, "grad_norm": 1.8603551387786865, "learning_rate": 9.763982004361527e-05, "loss": 1.6751, "step": 14584 }, { "epoch": 0.5223199097534335, "grad_norm": 1.5763189792633057, "learning_rate": 9.76282242776181e-05, "loss": 1.2796, "step": 14585 }, { "epoch": 0.5223557218830017, "grad_norm": 1.772830843925476, "learning_rate": 9.76166285435301e-05, "loss": 1.6905, "step": 14586 }, { "epoch": 0.5223915340125701, "grad_norm": 1.5136207342147827, "learning_rate": 9.76050328415073e-05, "loss": 1.3872, "step": 14587 }, { "epoch": 0.5224273461421384, "grad_norm": 1.3309680223464966, "learning_rate": 9.759343717170571e-05, "loss": 1.2488, "step": 14588 }, { "epoch": 0.5224631582717066, "grad_norm": 2.110062837600708, "learning_rate": 9.758184153428126e-05, "loss": 1.5052, "step": 14589 }, { "epoch": 0.5224989704012749, "grad_norm": 1.3645905256271362, "learning_rate": 9.757024592939008e-05, "loss": 1.4249, "step": 14590 }, { "epoch": 0.5225347825308432, "grad_norm": 1.6391838788986206, "learning_rate": 9.755865035718807e-05, "loss": 1.5082, "step": 14591 }, { "epoch": 0.5225705946604114, "grad_norm": 1.9833340644836426, "learning_rate": 9.754705481783127e-05, "loss": 1.6181, "step": 14592 }, { "epoch": 0.5226064067899797, "grad_norm": 1.6536145210266113, "learning_rate": 9.753545931147569e-05, "loss": 1.7407, "step": 14593 }, { "epoch": 0.5226422189195481, "grad_norm": 1.3998302221298218, "learning_rate": 9.752386383827733e-05, "loss": 1.5109, "step": 14594 }, { "epoch": 0.5226780310491164, "grad_norm": 2.0293946266174316, "learning_rate": 9.751226839839217e-05, "loss": 1.5249, "step": 14595 }, { "epoch": 0.5227138431786846, "grad_norm": 1.617629885673523, "learning_rate": 9.750067299197625e-05, "loss": 1.64, "step": 14596 }, { "epoch": 0.5227496553082529, "grad_norm": 1.4700734615325928, "learning_rate": 9.748907761918558e-05, "loss": 1.4105, "step": 14597 }, { "epoch": 0.5227854674378212, "grad_norm": 2.149026870727539, "learning_rate": 9.747748228017606e-05, "loss": 1.4068, "step": 14598 }, { "epoch": 0.5228212795673894, "grad_norm": 1.604383111000061, "learning_rate": 9.746588697510381e-05, "loss": 1.4969, "step": 14599 }, { "epoch": 0.5228570916969577, "grad_norm": 2.5129008293151855, "learning_rate": 9.745429170412476e-05, "loss": 1.5898, "step": 14600 }, { "epoch": 0.5228929038265261, "grad_norm": 1.2759195566177368, "learning_rate": 9.744269646739494e-05, "loss": 1.2335, "step": 14601 }, { "epoch": 0.5229287159560944, "grad_norm": 1.4611231088638306, "learning_rate": 9.743110126507034e-05, "loss": 1.2164, "step": 14602 }, { "epoch": 0.5229645280856626, "grad_norm": 1.63002610206604, "learning_rate": 9.741950609730696e-05, "loss": 1.5584, "step": 14603 }, { "epoch": 0.5230003402152309, "grad_norm": 1.3063840866088867, "learning_rate": 9.74079109642608e-05, "loss": 1.0043, "step": 14604 }, { "epoch": 0.5230361523447992, "grad_norm": 1.531385064125061, "learning_rate": 9.739631586608786e-05, "loss": 1.3484, "step": 14605 }, { "epoch": 0.5230719644743674, "grad_norm": 1.7938756942749023, "learning_rate": 9.738472080294415e-05, "loss": 1.5975, "step": 14606 }, { "epoch": 0.5231077766039357, "grad_norm": 1.6742467880249023, "learning_rate": 9.737312577498559e-05, "loss": 1.5811, "step": 14607 }, { "epoch": 0.5231435887335041, "grad_norm": 1.9026448726654053, "learning_rate": 9.73615307823683e-05, "loss": 1.3136, "step": 14608 }, { "epoch": 0.5231794008630724, "grad_norm": 1.5856115818023682, "learning_rate": 9.734993582524814e-05, "loss": 1.4487, "step": 14609 }, { "epoch": 0.5232152129926406, "grad_norm": 1.6417968273162842, "learning_rate": 9.733834090378125e-05, "loss": 1.3294, "step": 14610 }, { "epoch": 0.5232510251222089, "grad_norm": 1.5064736604690552, "learning_rate": 9.732674601812347e-05, "loss": 1.7278, "step": 14611 }, { "epoch": 0.5232868372517772, "grad_norm": 1.5335125923156738, "learning_rate": 9.731515116843094e-05, "loss": 1.5331, "step": 14612 }, { "epoch": 0.5233226493813454, "grad_norm": 1.4457260370254517, "learning_rate": 9.730355635485953e-05, "loss": 1.3542, "step": 14613 }, { "epoch": 0.5233584615109137, "grad_norm": 1.6329476833343506, "learning_rate": 9.729196157756534e-05, "loss": 1.429, "step": 14614 }, { "epoch": 0.5233942736404821, "grad_norm": 2.010593891143799, "learning_rate": 9.728036683670433e-05, "loss": 1.5153, "step": 14615 }, { "epoch": 0.5234300857700503, "grad_norm": 1.8740265369415283, "learning_rate": 9.72687721324324e-05, "loss": 1.582, "step": 14616 }, { "epoch": 0.5234658978996186, "grad_norm": 2.2544100284576416, "learning_rate": 9.725717746490571e-05, "loss": 1.6876, "step": 14617 }, { "epoch": 0.5235017100291869, "grad_norm": 1.692060112953186, "learning_rate": 9.724558283428007e-05, "loss": 1.5436, "step": 14618 }, { "epoch": 0.5235375221587552, "grad_norm": 1.4941781759262085, "learning_rate": 9.723398824071164e-05, "loss": 1.4658, "step": 14619 }, { "epoch": 0.5235733342883234, "grad_norm": 1.7871164083480835, "learning_rate": 9.722239368435624e-05, "loss": 1.5842, "step": 14620 }, { "epoch": 0.5236091464178917, "grad_norm": 2.2636523246765137, "learning_rate": 9.721079916537004e-05, "loss": 1.709, "step": 14621 }, { "epoch": 0.5236449585474601, "grad_norm": 1.7238781452178955, "learning_rate": 9.719920468390888e-05, "loss": 1.6237, "step": 14622 }, { "epoch": 0.5236807706770283, "grad_norm": 2.144887924194336, "learning_rate": 9.718761024012886e-05, "loss": 1.3904, "step": 14623 }, { "epoch": 0.5237165828065966, "grad_norm": 1.6877436637878418, "learning_rate": 9.717601583418588e-05, "loss": 1.5621, "step": 14624 }, { "epoch": 0.5237523949361649, "grad_norm": 1.2671278715133667, "learning_rate": 9.716442146623594e-05, "loss": 1.2752, "step": 14625 }, { "epoch": 0.5237882070657331, "grad_norm": 2.0848145484924316, "learning_rate": 9.715282713643512e-05, "loss": 1.378, "step": 14626 }, { "epoch": 0.5238240191953014, "grad_norm": 2.0249481201171875, "learning_rate": 9.714123284493925e-05, "loss": 1.227, "step": 14627 }, { "epoch": 0.5238598313248697, "grad_norm": 1.9421577453613281, "learning_rate": 9.712963859190449e-05, "loss": 1.6061, "step": 14628 }, { "epoch": 0.5238956434544381, "grad_norm": 1.442251443862915, "learning_rate": 9.711804437748669e-05, "loss": 1.4937, "step": 14629 }, { "epoch": 0.5239314555840063, "grad_norm": 1.8193825483322144, "learning_rate": 9.710645020184193e-05, "loss": 1.5649, "step": 14630 }, { "epoch": 0.5239672677135746, "grad_norm": 1.8133513927459717, "learning_rate": 9.709485606512607e-05, "loss": 1.6271, "step": 14631 }, { "epoch": 0.5240030798431429, "grad_norm": 1.5228266716003418, "learning_rate": 9.708326196749527e-05, "loss": 1.9068, "step": 14632 }, { "epoch": 0.5240388919727111, "grad_norm": 2.925337314605713, "learning_rate": 9.707166790910538e-05, "loss": 1.6324, "step": 14633 }, { "epoch": 0.5240747041022794, "grad_norm": 2.007188081741333, "learning_rate": 9.70600738901124e-05, "loss": 1.4698, "step": 14634 }, { "epoch": 0.5241105162318477, "grad_norm": 1.4450551271438599, "learning_rate": 9.704847991067236e-05, "loss": 1.4539, "step": 14635 }, { "epoch": 0.5241463283614161, "grad_norm": 1.247611165046692, "learning_rate": 9.703688597094118e-05, "loss": 1.394, "step": 14636 }, { "epoch": 0.5241821404909843, "grad_norm": 1.4059821367263794, "learning_rate": 9.702529207107491e-05, "loss": 1.4542, "step": 14637 }, { "epoch": 0.5242179526205526, "grad_norm": 1.3357560634613037, "learning_rate": 9.701369821122945e-05, "loss": 1.4459, "step": 14638 }, { "epoch": 0.5242537647501209, "grad_norm": 1.6569325923919678, "learning_rate": 9.70021043915609e-05, "loss": 1.3154, "step": 14639 }, { "epoch": 0.5242895768796891, "grad_norm": 1.5145827531814575, "learning_rate": 9.69905106122251e-05, "loss": 1.4301, "step": 14640 }, { "epoch": 0.5243253890092574, "grad_norm": 1.7709879875183105, "learning_rate": 9.697891687337817e-05, "loss": 1.4174, "step": 14641 }, { "epoch": 0.5243612011388257, "grad_norm": 1.6160484552383423, "learning_rate": 9.696732317517599e-05, "loss": 1.5869, "step": 14642 }, { "epoch": 0.524397013268394, "grad_norm": 1.8500139713287354, "learning_rate": 9.695572951777454e-05, "loss": 1.5376, "step": 14643 }, { "epoch": 0.5244328253979623, "grad_norm": 2.073073148727417, "learning_rate": 9.694413590132985e-05, "loss": 1.3357, "step": 14644 }, { "epoch": 0.5244686375275306, "grad_norm": 1.8784855604171753, "learning_rate": 9.693254232599784e-05, "loss": 1.4267, "step": 14645 }, { "epoch": 0.5245044496570989, "grad_norm": 1.3910045623779297, "learning_rate": 9.692094879193455e-05, "loss": 1.5124, "step": 14646 }, { "epoch": 0.5245402617866671, "grad_norm": 2.0571978092193604, "learning_rate": 9.690935529929587e-05, "loss": 1.9757, "step": 14647 }, { "epoch": 0.5245760739162354, "grad_norm": 1.7781730890274048, "learning_rate": 9.689776184823789e-05, "loss": 1.3493, "step": 14648 }, { "epoch": 0.5246118860458037, "grad_norm": 1.7598072290420532, "learning_rate": 9.688616843891648e-05, "loss": 1.1725, "step": 14649 }, { "epoch": 0.524647698175372, "grad_norm": 2.3508193492889404, "learning_rate": 9.687457507148768e-05, "loss": 1.685, "step": 14650 }, { "epoch": 0.5246835103049403, "grad_norm": 2.0934391021728516, "learning_rate": 9.686298174610745e-05, "loss": 1.3282, "step": 14651 }, { "epoch": 0.5247193224345086, "grad_norm": 1.3602052927017212, "learning_rate": 9.685138846293171e-05, "loss": 1.3148, "step": 14652 }, { "epoch": 0.5247551345640769, "grad_norm": 1.5019041299819946, "learning_rate": 9.683979522211652e-05, "loss": 1.4427, "step": 14653 }, { "epoch": 0.5247909466936451, "grad_norm": 1.4769161939620972, "learning_rate": 9.682820202381779e-05, "loss": 1.4798, "step": 14654 }, { "epoch": 0.5248267588232134, "grad_norm": 1.3109664916992188, "learning_rate": 9.681660886819152e-05, "loss": 1.3262, "step": 14655 }, { "epoch": 0.5248625709527817, "grad_norm": 1.9010754823684692, "learning_rate": 9.680501575539365e-05, "loss": 1.4391, "step": 14656 }, { "epoch": 0.52489838308235, "grad_norm": 1.923020839691162, "learning_rate": 9.679342268558019e-05, "loss": 1.4267, "step": 14657 }, { "epoch": 0.5249341952119183, "grad_norm": 1.572023630142212, "learning_rate": 9.678182965890708e-05, "loss": 1.4757, "step": 14658 }, { "epoch": 0.5249700073414866, "grad_norm": 2.1957943439483643, "learning_rate": 9.677023667553033e-05, "loss": 1.4175, "step": 14659 }, { "epoch": 0.5250058194710548, "grad_norm": 1.832504153251648, "learning_rate": 9.675864373560586e-05, "loss": 1.2453, "step": 14660 }, { "epoch": 0.5250416316006231, "grad_norm": 2.1098172664642334, "learning_rate": 9.674705083928965e-05, "loss": 1.6742, "step": 14661 }, { "epoch": 0.5250774437301914, "grad_norm": 1.6381540298461914, "learning_rate": 9.673545798673769e-05, "loss": 1.4837, "step": 14662 }, { "epoch": 0.5251132558597597, "grad_norm": 1.4275070428848267, "learning_rate": 9.67238651781059e-05, "loss": 1.3579, "step": 14663 }, { "epoch": 0.525149067989328, "grad_norm": 1.609323501586914, "learning_rate": 9.671227241355031e-05, "loss": 1.3936, "step": 14664 }, { "epoch": 0.5251848801188963, "grad_norm": 1.49817955493927, "learning_rate": 9.670067969322684e-05, "loss": 1.5228, "step": 14665 }, { "epoch": 0.5252206922484646, "grad_norm": 1.4510881900787354, "learning_rate": 9.668908701729148e-05, "loss": 1.3275, "step": 14666 }, { "epoch": 0.5252565043780328, "grad_norm": 1.4491652250289917, "learning_rate": 9.667749438590017e-05, "loss": 1.4348, "step": 14667 }, { "epoch": 0.5252923165076011, "grad_norm": 1.9161545038223267, "learning_rate": 9.66659017992089e-05, "loss": 1.5006, "step": 14668 }, { "epoch": 0.5253281286371694, "grad_norm": 1.586111307144165, "learning_rate": 9.665430925737362e-05, "loss": 1.5004, "step": 14669 }, { "epoch": 0.5253639407667376, "grad_norm": 1.7797801494598389, "learning_rate": 9.664271676055027e-05, "loss": 1.3378, "step": 14670 }, { "epoch": 0.525399752896306, "grad_norm": 1.4467164278030396, "learning_rate": 9.663112430889487e-05, "loss": 1.249, "step": 14671 }, { "epoch": 0.5254355650258743, "grad_norm": 1.5761590003967285, "learning_rate": 9.661953190256333e-05, "loss": 1.6898, "step": 14672 }, { "epoch": 0.5254713771554426, "grad_norm": 1.3600083589553833, "learning_rate": 9.660793954171163e-05, "loss": 1.6802, "step": 14673 }, { "epoch": 0.5255071892850108, "grad_norm": 1.4301543235778809, "learning_rate": 9.65963472264957e-05, "loss": 1.5224, "step": 14674 }, { "epoch": 0.5255430014145791, "grad_norm": 1.6215018033981323, "learning_rate": 9.658475495707157e-05, "loss": 1.6439, "step": 14675 }, { "epoch": 0.5255788135441474, "grad_norm": 1.8243167400360107, "learning_rate": 9.657316273359515e-05, "loss": 1.2978, "step": 14676 }, { "epoch": 0.5256146256737156, "grad_norm": 1.8288040161132812, "learning_rate": 9.65615705562224e-05, "loss": 1.7493, "step": 14677 }, { "epoch": 0.525650437803284, "grad_norm": 1.4830673933029175, "learning_rate": 9.654997842510928e-05, "loss": 1.687, "step": 14678 }, { "epoch": 0.5256862499328523, "grad_norm": 1.6650246381759644, "learning_rate": 9.653838634041173e-05, "loss": 1.5981, "step": 14679 }, { "epoch": 0.5257220620624206, "grad_norm": 1.6408103704452515, "learning_rate": 9.652679430228576e-05, "loss": 1.7101, "step": 14680 }, { "epoch": 0.5257578741919888, "grad_norm": 1.654687523841858, "learning_rate": 9.651520231088726e-05, "loss": 1.4703, "step": 14681 }, { "epoch": 0.5257936863215571, "grad_norm": 1.589911699295044, "learning_rate": 9.650361036637225e-05, "loss": 1.5057, "step": 14682 }, { "epoch": 0.5258294984511254, "grad_norm": 1.8815149068832397, "learning_rate": 9.649201846889663e-05, "loss": 1.6379, "step": 14683 }, { "epoch": 0.5258653105806936, "grad_norm": 1.7435766458511353, "learning_rate": 9.64804266186164e-05, "loss": 1.4799, "step": 14684 }, { "epoch": 0.525901122710262, "grad_norm": 1.667809247970581, "learning_rate": 9.646883481568748e-05, "loss": 1.5879, "step": 14685 }, { "epoch": 0.5259369348398303, "grad_norm": 1.662645936012268, "learning_rate": 9.645724306026582e-05, "loss": 1.5564, "step": 14686 }, { "epoch": 0.5259727469693986, "grad_norm": 1.785279631614685, "learning_rate": 9.644565135250739e-05, "loss": 1.2906, "step": 14687 }, { "epoch": 0.5260085590989668, "grad_norm": 2.513857364654541, "learning_rate": 9.643405969256814e-05, "loss": 1.3403, "step": 14688 }, { "epoch": 0.5260443712285351, "grad_norm": 2.5112111568450928, "learning_rate": 9.642246808060401e-05, "loss": 1.6178, "step": 14689 }, { "epoch": 0.5260801833581034, "grad_norm": 1.8668169975280762, "learning_rate": 9.641087651677096e-05, "loss": 1.6152, "step": 14690 }, { "epoch": 0.5261159954876716, "grad_norm": 1.6442675590515137, "learning_rate": 9.639928500122495e-05, "loss": 1.8965, "step": 14691 }, { "epoch": 0.52615180761724, "grad_norm": 1.996891975402832, "learning_rate": 9.638769353412189e-05, "loss": 1.8066, "step": 14692 }, { "epoch": 0.5261876197468083, "grad_norm": 1.669157862663269, "learning_rate": 9.637610211561779e-05, "loss": 1.584, "step": 14693 }, { "epoch": 0.5262234318763765, "grad_norm": 1.7696603536605835, "learning_rate": 9.636451074586856e-05, "loss": 1.5034, "step": 14694 }, { "epoch": 0.5262592440059448, "grad_norm": 1.5234453678131104, "learning_rate": 9.63529194250301e-05, "loss": 1.4586, "step": 14695 }, { "epoch": 0.5262950561355131, "grad_norm": 1.9366183280944824, "learning_rate": 9.634132815325844e-05, "loss": 1.4498, "step": 14696 }, { "epoch": 0.5263308682650814, "grad_norm": 1.821183443069458, "learning_rate": 9.632973693070947e-05, "loss": 1.1892, "step": 14697 }, { "epoch": 0.5263666803946496, "grad_norm": 1.601720929145813, "learning_rate": 9.631814575753918e-05, "loss": 1.59, "step": 14698 }, { "epoch": 0.526402492524218, "grad_norm": 1.829098105430603, "learning_rate": 9.630655463390347e-05, "loss": 1.2141, "step": 14699 }, { "epoch": 0.5264383046537863, "grad_norm": 1.2035573720932007, "learning_rate": 9.629496355995831e-05, "loss": 1.3387, "step": 14700 }, { "epoch": 0.5264741167833545, "grad_norm": 1.415198802947998, "learning_rate": 9.628337253585964e-05, "loss": 1.3656, "step": 14701 }, { "epoch": 0.5265099289129228, "grad_norm": 1.8165593147277832, "learning_rate": 9.62717815617634e-05, "loss": 1.5449, "step": 14702 }, { "epoch": 0.5265457410424911, "grad_norm": 1.64618718624115, "learning_rate": 9.626019063782557e-05, "loss": 1.4761, "step": 14703 }, { "epoch": 0.5265815531720593, "grad_norm": 1.2743486166000366, "learning_rate": 9.624859976420196e-05, "loss": 1.4644, "step": 14704 }, { "epoch": 0.5266173653016276, "grad_norm": 1.5379201173782349, "learning_rate": 9.623700894104869e-05, "loss": 1.2605, "step": 14705 }, { "epoch": 0.526653177431196, "grad_norm": 1.9712167978286743, "learning_rate": 9.622541816852153e-05, "loss": 1.3814, "step": 14706 }, { "epoch": 0.5266889895607643, "grad_norm": 2.3892998695373535, "learning_rate": 9.621382744677658e-05, "loss": 1.8143, "step": 14707 }, { "epoch": 0.5267248016903325, "grad_norm": 1.2501753568649292, "learning_rate": 9.620223677596962e-05, "loss": 1.4968, "step": 14708 }, { "epoch": 0.5267606138199008, "grad_norm": 1.9475300312042236, "learning_rate": 9.619064615625671e-05, "loss": 1.5235, "step": 14709 }, { "epoch": 0.5267964259494691, "grad_norm": 2.2302019596099854, "learning_rate": 9.617905558779373e-05, "loss": 1.6564, "step": 14710 }, { "epoch": 0.5268322380790373, "grad_norm": 1.5667731761932373, "learning_rate": 9.616746507073664e-05, "loss": 1.4212, "step": 14711 }, { "epoch": 0.5268680502086056, "grad_norm": 2.0810258388519287, "learning_rate": 9.61558746052414e-05, "loss": 1.8592, "step": 14712 }, { "epoch": 0.526903862338174, "grad_norm": 1.1688623428344727, "learning_rate": 9.614428419146381e-05, "loss": 1.5327, "step": 14713 }, { "epoch": 0.5269396744677423, "grad_norm": 2.2579002380371094, "learning_rate": 9.613269382956e-05, "loss": 1.5626, "step": 14714 }, { "epoch": 0.5269754865973105, "grad_norm": 1.4683496952056885, "learning_rate": 9.612110351968573e-05, "loss": 1.3157, "step": 14715 }, { "epoch": 0.5270112987268788, "grad_norm": 1.8713817596435547, "learning_rate": 9.610951326199707e-05, "loss": 1.4675, "step": 14716 }, { "epoch": 0.5270471108564471, "grad_norm": 2.5871567726135254, "learning_rate": 9.609792305664984e-05, "loss": 1.8185, "step": 14717 }, { "epoch": 0.5270829229860153, "grad_norm": 1.4792441129684448, "learning_rate": 9.608633290380008e-05, "loss": 1.4989, "step": 14718 }, { "epoch": 0.5271187351155836, "grad_norm": 1.3156225681304932, "learning_rate": 9.60747428036036e-05, "loss": 1.3848, "step": 14719 }, { "epoch": 0.527154547245152, "grad_norm": 1.3840765953063965, "learning_rate": 9.606315275621644e-05, "loss": 1.5157, "step": 14720 }, { "epoch": 0.5271903593747203, "grad_norm": 1.6863317489624023, "learning_rate": 9.605156276179447e-05, "loss": 1.5742, "step": 14721 }, { "epoch": 0.5272261715042885, "grad_norm": 1.9276484251022339, "learning_rate": 9.60399728204936e-05, "loss": 1.7787, "step": 14722 }, { "epoch": 0.5272619836338568, "grad_norm": 1.3478091955184937, "learning_rate": 9.602838293246984e-05, "loss": 1.5074, "step": 14723 }, { "epoch": 0.5272977957634251, "grad_norm": 1.4894832372665405, "learning_rate": 9.6016793097879e-05, "loss": 1.3248, "step": 14724 }, { "epoch": 0.5273336078929933, "grad_norm": 1.655637502670288, "learning_rate": 9.600520331687713e-05, "loss": 1.6261, "step": 14725 }, { "epoch": 0.5273694200225616, "grad_norm": 1.4634572267532349, "learning_rate": 9.599361358962005e-05, "loss": 1.3915, "step": 14726 }, { "epoch": 0.52740523215213, "grad_norm": 1.8374460935592651, "learning_rate": 9.598202391626379e-05, "loss": 1.5723, "step": 14727 }, { "epoch": 0.5274410442816982, "grad_norm": 1.6577955484390259, "learning_rate": 9.597043429696413e-05, "loss": 1.5211, "step": 14728 }, { "epoch": 0.5274768564112665, "grad_norm": 1.743890404701233, "learning_rate": 9.595884473187716e-05, "loss": 1.6837, "step": 14729 }, { "epoch": 0.5275126685408348, "grad_norm": 1.3714507818222046, "learning_rate": 9.594725522115871e-05, "loss": 1.5259, "step": 14730 }, { "epoch": 0.527548480670403, "grad_norm": 1.3609662055969238, "learning_rate": 9.593566576496468e-05, "loss": 1.5633, "step": 14731 }, { "epoch": 0.5275842927999713, "grad_norm": 1.580083966255188, "learning_rate": 9.592407636345104e-05, "loss": 1.3267, "step": 14732 }, { "epoch": 0.5276201049295396, "grad_norm": 1.6475521326065063, "learning_rate": 9.591248701677368e-05, "loss": 1.6335, "step": 14733 }, { "epoch": 0.527655917059108, "grad_norm": 1.232351541519165, "learning_rate": 9.590089772508856e-05, "loss": 1.3328, "step": 14734 }, { "epoch": 0.5276917291886762, "grad_norm": 1.7131110429763794, "learning_rate": 9.588930848855152e-05, "loss": 1.8763, "step": 14735 }, { "epoch": 0.5277275413182445, "grad_norm": 1.3705060482025146, "learning_rate": 9.58777193073186e-05, "loss": 1.5272, "step": 14736 }, { "epoch": 0.5277633534478128, "grad_norm": 2.4891092777252197, "learning_rate": 9.58661301815456e-05, "loss": 1.6995, "step": 14737 }, { "epoch": 0.527799165577381, "grad_norm": 1.6959772109985352, "learning_rate": 9.585454111138853e-05, "loss": 1.2758, "step": 14738 }, { "epoch": 0.5278349777069493, "grad_norm": 1.470699667930603, "learning_rate": 9.584295209700326e-05, "loss": 1.6098, "step": 14739 }, { "epoch": 0.5278707898365176, "grad_norm": 2.051999092102051, "learning_rate": 9.583136313854567e-05, "loss": 1.627, "step": 14740 }, { "epoch": 0.527906601966086, "grad_norm": 1.461935043334961, "learning_rate": 9.581977423617173e-05, "loss": 1.0868, "step": 14741 }, { "epoch": 0.5279424140956542, "grad_norm": 1.76284921169281, "learning_rate": 9.580818539003733e-05, "loss": 1.3957, "step": 14742 }, { "epoch": 0.5279782262252225, "grad_norm": 1.7833982706069946, "learning_rate": 9.579659660029841e-05, "loss": 1.5588, "step": 14743 }, { "epoch": 0.5280140383547908, "grad_norm": 1.697553038597107, "learning_rate": 9.578500786711082e-05, "loss": 1.6178, "step": 14744 }, { "epoch": 0.528049850484359, "grad_norm": 2.319493055343628, "learning_rate": 9.577341919063055e-05, "loss": 1.2546, "step": 14745 }, { "epoch": 0.5280856626139273, "grad_norm": 1.524808406829834, "learning_rate": 9.576183057101345e-05, "loss": 1.2757, "step": 14746 }, { "epoch": 0.5281214747434956, "grad_norm": 1.9178670644760132, "learning_rate": 9.575024200841547e-05, "loss": 1.4959, "step": 14747 }, { "epoch": 0.528157286873064, "grad_norm": 2.4134249687194824, "learning_rate": 9.573865350299251e-05, "loss": 1.5957, "step": 14748 }, { "epoch": 0.5281930990026322, "grad_norm": 1.6927388906478882, "learning_rate": 9.572706505490043e-05, "loss": 1.6095, "step": 14749 }, { "epoch": 0.5282289111322005, "grad_norm": 1.5404099225997925, "learning_rate": 9.571547666429521e-05, "loss": 1.338, "step": 14750 }, { "epoch": 0.5282647232617688, "grad_norm": 1.3156019449234009, "learning_rate": 9.57038883313327e-05, "loss": 1.345, "step": 14751 }, { "epoch": 0.528300535391337, "grad_norm": 1.3669182062149048, "learning_rate": 9.569230005616887e-05, "loss": 1.6818, "step": 14752 }, { "epoch": 0.5283363475209053, "grad_norm": 1.1214356422424316, "learning_rate": 9.568071183895954e-05, "loss": 1.3239, "step": 14753 }, { "epoch": 0.5283721596504736, "grad_norm": 2.038661479949951, "learning_rate": 9.56691236798607e-05, "loss": 1.3369, "step": 14754 }, { "epoch": 0.528407971780042, "grad_norm": 2.319150686264038, "learning_rate": 9.565753557902818e-05, "loss": 1.7293, "step": 14755 }, { "epoch": 0.5284437839096102, "grad_norm": 1.3403654098510742, "learning_rate": 9.564594753661796e-05, "loss": 1.5201, "step": 14756 }, { "epoch": 0.5284795960391785, "grad_norm": 1.679591178894043, "learning_rate": 9.563435955278587e-05, "loss": 1.3043, "step": 14757 }, { "epoch": 0.5285154081687468, "grad_norm": 1.7903519868850708, "learning_rate": 9.562277162768785e-05, "loss": 1.5777, "step": 14758 }, { "epoch": 0.528551220298315, "grad_norm": 1.7412021160125732, "learning_rate": 9.561118376147979e-05, "loss": 1.7425, "step": 14759 }, { "epoch": 0.5285870324278833, "grad_norm": 1.7483241558074951, "learning_rate": 9.559959595431758e-05, "loss": 1.6111, "step": 14760 }, { "epoch": 0.5286228445574516, "grad_norm": 1.5660126209259033, "learning_rate": 9.558800820635715e-05, "loss": 1.5506, "step": 14761 }, { "epoch": 0.52865865668702, "grad_norm": 1.5264723300933838, "learning_rate": 9.557642051775436e-05, "loss": 1.4398, "step": 14762 }, { "epoch": 0.5286944688165882, "grad_norm": 1.40674889087677, "learning_rate": 9.556483288866515e-05, "loss": 1.2538, "step": 14763 }, { "epoch": 0.5287302809461565, "grad_norm": 1.4413470029830933, "learning_rate": 9.555324531924536e-05, "loss": 1.5525, "step": 14764 }, { "epoch": 0.5287660930757248, "grad_norm": 1.8058099746704102, "learning_rate": 9.554165780965095e-05, "loss": 1.6483, "step": 14765 }, { "epoch": 0.528801905205293, "grad_norm": 1.4641554355621338, "learning_rate": 9.553007036003777e-05, "loss": 1.4808, "step": 14766 }, { "epoch": 0.5288377173348613, "grad_norm": 1.9598422050476074, "learning_rate": 9.551848297056171e-05, "loss": 1.3693, "step": 14767 }, { "epoch": 0.5288735294644296, "grad_norm": 1.3409147262573242, "learning_rate": 9.550689564137872e-05, "loss": 1.4255, "step": 14768 }, { "epoch": 0.5289093415939979, "grad_norm": 1.9200905561447144, "learning_rate": 9.54953083726446e-05, "loss": 1.692, "step": 14769 }, { "epoch": 0.5289451537235662, "grad_norm": 2.1611762046813965, "learning_rate": 9.548372116451535e-05, "loss": 1.7349, "step": 14770 }, { "epoch": 0.5289809658531345, "grad_norm": 1.589970350265503, "learning_rate": 9.547213401714677e-05, "loss": 1.4199, "step": 14771 }, { "epoch": 0.5290167779827027, "grad_norm": 1.5894039869308472, "learning_rate": 9.546054693069481e-05, "loss": 1.5671, "step": 14772 }, { "epoch": 0.529052590112271, "grad_norm": 1.421728253364563, "learning_rate": 9.544895990531532e-05, "loss": 1.4939, "step": 14773 }, { "epoch": 0.5290884022418393, "grad_norm": 1.5457122325897217, "learning_rate": 9.54373729411642e-05, "loss": 1.363, "step": 14774 }, { "epoch": 0.5291242143714076, "grad_norm": 2.2558932304382324, "learning_rate": 9.542578603839736e-05, "loss": 1.6362, "step": 14775 }, { "epoch": 0.5291600265009759, "grad_norm": 2.0703535079956055, "learning_rate": 9.541419919717064e-05, "loss": 1.2543, "step": 14776 }, { "epoch": 0.5291958386305442, "grad_norm": 1.567732810974121, "learning_rate": 9.540261241763999e-05, "loss": 1.497, "step": 14777 }, { "epoch": 0.5292316507601125, "grad_norm": 1.5150243043899536, "learning_rate": 9.539102569996124e-05, "loss": 1.4928, "step": 14778 }, { "epoch": 0.5292674628896807, "grad_norm": 1.4762626886367798, "learning_rate": 9.53794390442903e-05, "loss": 1.4142, "step": 14779 }, { "epoch": 0.529303275019249, "grad_norm": 1.325579285621643, "learning_rate": 9.536785245078304e-05, "loss": 1.1916, "step": 14780 }, { "epoch": 0.5293390871488173, "grad_norm": 1.704073429107666, "learning_rate": 9.535626591959536e-05, "loss": 1.7114, "step": 14781 }, { "epoch": 0.5293748992783855, "grad_norm": 1.6421252489089966, "learning_rate": 9.534467945088313e-05, "loss": 1.4885, "step": 14782 }, { "epoch": 0.5294107114079539, "grad_norm": 1.4458218812942505, "learning_rate": 9.533309304480221e-05, "loss": 1.5306, "step": 14783 }, { "epoch": 0.5294465235375222, "grad_norm": 1.281543254852295, "learning_rate": 9.532150670150854e-05, "loss": 1.3572, "step": 14784 }, { "epoch": 0.5294823356670905, "grad_norm": 2.6501762866973877, "learning_rate": 9.530992042115794e-05, "loss": 1.604, "step": 14785 }, { "epoch": 0.5295181477966587, "grad_norm": 1.6431045532226562, "learning_rate": 9.529833420390631e-05, "loss": 1.6053, "step": 14786 }, { "epoch": 0.529553959926227, "grad_norm": 1.435895323753357, "learning_rate": 9.528674804990954e-05, "loss": 1.4593, "step": 14787 }, { "epoch": 0.5295897720557953, "grad_norm": 1.460228443145752, "learning_rate": 9.527516195932349e-05, "loss": 1.246, "step": 14788 }, { "epoch": 0.5296255841853635, "grad_norm": 1.7602965831756592, "learning_rate": 9.526357593230403e-05, "loss": 1.6471, "step": 14789 }, { "epoch": 0.5296613963149319, "grad_norm": 2.1996421813964844, "learning_rate": 9.525198996900707e-05, "loss": 1.6656, "step": 14790 }, { "epoch": 0.5296972084445002, "grad_norm": 3.2415080070495605, "learning_rate": 9.524040406958847e-05, "loss": 1.6736, "step": 14791 }, { "epoch": 0.5297330205740685, "grad_norm": 1.3910014629364014, "learning_rate": 9.522881823420404e-05, "loss": 1.4791, "step": 14792 }, { "epoch": 0.5297688327036367, "grad_norm": 1.5591599941253662, "learning_rate": 9.521723246300977e-05, "loss": 1.5115, "step": 14793 }, { "epoch": 0.529804644833205, "grad_norm": 1.4825100898742676, "learning_rate": 9.520564675616141e-05, "loss": 1.675, "step": 14794 }, { "epoch": 0.5298404569627733, "grad_norm": 1.701743245124817, "learning_rate": 9.519406111381492e-05, "loss": 1.409, "step": 14795 }, { "epoch": 0.5298762690923415, "grad_norm": 1.5088096857070923, "learning_rate": 9.518247553612613e-05, "loss": 1.2716, "step": 14796 }, { "epoch": 0.5299120812219099, "grad_norm": 1.9061812162399292, "learning_rate": 9.517089002325093e-05, "loss": 1.8451, "step": 14797 }, { "epoch": 0.5299478933514782, "grad_norm": 1.4009358882904053, "learning_rate": 9.515930457534514e-05, "loss": 1.1994, "step": 14798 }, { "epoch": 0.5299837054810465, "grad_norm": 1.6844242811203003, "learning_rate": 9.514771919256472e-05, "loss": 1.4261, "step": 14799 }, { "epoch": 0.5300195176106147, "grad_norm": 1.8280789852142334, "learning_rate": 9.513613387506547e-05, "loss": 1.5539, "step": 14800 }, { "epoch": 0.530055329740183, "grad_norm": 1.3354803323745728, "learning_rate": 9.512454862300321e-05, "loss": 1.4928, "step": 14801 }, { "epoch": 0.5300911418697513, "grad_norm": 1.5250039100646973, "learning_rate": 9.511296343653391e-05, "loss": 1.094, "step": 14802 }, { "epoch": 0.5301269539993195, "grad_norm": 2.810237169265747, "learning_rate": 9.510137831581334e-05, "loss": 1.7582, "step": 14803 }, { "epoch": 0.5301627661288879, "grad_norm": 1.547545075416565, "learning_rate": 9.508979326099747e-05, "loss": 1.0475, "step": 14804 }, { "epoch": 0.5301985782584562, "grad_norm": 2.176013469696045, "learning_rate": 9.507820827224202e-05, "loss": 1.4733, "step": 14805 }, { "epoch": 0.5302343903880244, "grad_norm": 1.2221146821975708, "learning_rate": 9.5066623349703e-05, "loss": 1.1236, "step": 14806 }, { "epoch": 0.5302702025175927, "grad_norm": 1.900315523147583, "learning_rate": 9.505503849353613e-05, "loss": 1.6962, "step": 14807 }, { "epoch": 0.530306014647161, "grad_norm": 1.854783296585083, "learning_rate": 9.504345370389739e-05, "loss": 1.5795, "step": 14808 }, { "epoch": 0.5303418267767293, "grad_norm": 1.4548566341400146, "learning_rate": 9.50318689809426e-05, "loss": 1.5435, "step": 14809 }, { "epoch": 0.5303776389062975, "grad_norm": 1.689694881439209, "learning_rate": 9.502028432482755e-05, "loss": 1.5173, "step": 14810 }, { "epoch": 0.5304134510358659, "grad_norm": 1.620489478111267, "learning_rate": 9.50086997357082e-05, "loss": 1.2642, "step": 14811 }, { "epoch": 0.5304492631654342, "grad_norm": 2.3089258670806885, "learning_rate": 9.499711521374031e-05, "loss": 1.5737, "step": 14812 }, { "epoch": 0.5304850752950024, "grad_norm": 2.3097901344299316, "learning_rate": 9.498553075907985e-05, "loss": 1.688, "step": 14813 }, { "epoch": 0.5305208874245707, "grad_norm": 1.4224505424499512, "learning_rate": 9.497394637188251e-05, "loss": 1.4562, "step": 14814 }, { "epoch": 0.530556699554139, "grad_norm": 1.8305726051330566, "learning_rate": 9.496236205230433e-05, "loss": 1.4744, "step": 14815 }, { "epoch": 0.5305925116837072, "grad_norm": 1.4154525995254517, "learning_rate": 9.4950777800501e-05, "loss": 1.4215, "step": 14816 }, { "epoch": 0.5306283238132755, "grad_norm": 1.8109185695648193, "learning_rate": 9.49391936166285e-05, "loss": 1.3802, "step": 14817 }, { "epoch": 0.5306641359428439, "grad_norm": 1.5938786268234253, "learning_rate": 9.492760950084261e-05, "loss": 1.4896, "step": 14818 }, { "epoch": 0.5306999480724122, "grad_norm": 1.4196759462356567, "learning_rate": 9.491602545329916e-05, "loss": 1.4166, "step": 14819 }, { "epoch": 0.5307357602019804, "grad_norm": 1.5144906044006348, "learning_rate": 9.490444147415407e-05, "loss": 1.3838, "step": 14820 }, { "epoch": 0.5307715723315487, "grad_norm": 2.2972259521484375, "learning_rate": 9.489285756356307e-05, "loss": 1.5379, "step": 14821 }, { "epoch": 0.530807384461117, "grad_norm": 1.4997377395629883, "learning_rate": 9.488127372168218e-05, "loss": 1.4986, "step": 14822 }, { "epoch": 0.5308431965906852, "grad_norm": 2.4222404956817627, "learning_rate": 9.486968994866708e-05, "loss": 1.652, "step": 14823 }, { "epoch": 0.5308790087202535, "grad_norm": 1.8816639184951782, "learning_rate": 9.485810624467372e-05, "loss": 1.3654, "step": 14824 }, { "epoch": 0.5309148208498219, "grad_norm": 1.8047064542770386, "learning_rate": 9.484652260985787e-05, "loss": 1.4498, "step": 14825 }, { "epoch": 0.5309506329793902, "grad_norm": 1.329789638519287, "learning_rate": 9.483493904437548e-05, "loss": 1.3135, "step": 14826 }, { "epoch": 0.5309864451089584, "grad_norm": 3.3496294021606445, "learning_rate": 9.482335554838229e-05, "loss": 1.1479, "step": 14827 }, { "epoch": 0.5310222572385267, "grad_norm": 1.7045812606811523, "learning_rate": 9.481177212203415e-05, "loss": 1.8932, "step": 14828 }, { "epoch": 0.531058069368095, "grad_norm": 1.3624588251113892, "learning_rate": 9.480018876548695e-05, "loss": 1.4705, "step": 14829 }, { "epoch": 0.5310938814976632, "grad_norm": 1.6676069498062134, "learning_rate": 9.478860547889647e-05, "loss": 1.5551, "step": 14830 }, { "epoch": 0.5311296936272315, "grad_norm": 1.7879300117492676, "learning_rate": 9.477702226241862e-05, "loss": 1.5219, "step": 14831 }, { "epoch": 0.5311655057567999, "grad_norm": 1.5257636308670044, "learning_rate": 9.476543911620918e-05, "loss": 1.5649, "step": 14832 }, { "epoch": 0.5312013178863682, "grad_norm": 2.001162052154541, "learning_rate": 9.4753856040424e-05, "loss": 1.549, "step": 14833 }, { "epoch": 0.5312371300159364, "grad_norm": 2.1974189281463623, "learning_rate": 9.47422730352189e-05, "loss": 1.283, "step": 14834 }, { "epoch": 0.5312729421455047, "grad_norm": 1.6734133958816528, "learning_rate": 9.47306901007498e-05, "loss": 1.4485, "step": 14835 }, { "epoch": 0.531308754275073, "grad_norm": 1.9478635787963867, "learning_rate": 9.471910723717243e-05, "loss": 1.4408, "step": 14836 }, { "epoch": 0.5313445664046412, "grad_norm": 2.0421502590179443, "learning_rate": 9.470752444464265e-05, "loss": 1.6513, "step": 14837 }, { "epoch": 0.5313803785342095, "grad_norm": 1.3936954736709595, "learning_rate": 9.469594172331631e-05, "loss": 1.3181, "step": 14838 }, { "epoch": 0.5314161906637779, "grad_norm": 1.9355803728103638, "learning_rate": 9.468435907334922e-05, "loss": 1.3821, "step": 14839 }, { "epoch": 0.5314520027933461, "grad_norm": 1.3044540882110596, "learning_rate": 9.467277649489725e-05, "loss": 1.381, "step": 14840 }, { "epoch": 0.5314878149229144, "grad_norm": 1.856377124786377, "learning_rate": 9.466119398811617e-05, "loss": 1.5137, "step": 14841 }, { "epoch": 0.5315236270524827, "grad_norm": 1.3732273578643799, "learning_rate": 9.464961155316187e-05, "loss": 1.4637, "step": 14842 }, { "epoch": 0.531559439182051, "grad_norm": 1.383651852607727, "learning_rate": 9.463802919019011e-05, "loss": 1.2581, "step": 14843 }, { "epoch": 0.5315952513116192, "grad_norm": 2.1127378940582275, "learning_rate": 9.462644689935678e-05, "loss": 1.6943, "step": 14844 }, { "epoch": 0.5316310634411875, "grad_norm": 1.6620867252349854, "learning_rate": 9.461486468081768e-05, "loss": 1.5948, "step": 14845 }, { "epoch": 0.5316668755707559, "grad_norm": 1.6653188467025757, "learning_rate": 9.460328253472859e-05, "loss": 1.7102, "step": 14846 }, { "epoch": 0.5317026877003241, "grad_norm": 1.5845062732696533, "learning_rate": 9.459170046124542e-05, "loss": 1.6149, "step": 14847 }, { "epoch": 0.5317384998298924, "grad_norm": 1.3799561262130737, "learning_rate": 9.458011846052391e-05, "loss": 1.5361, "step": 14848 }, { "epoch": 0.5317743119594607, "grad_norm": 2.455580949783325, "learning_rate": 9.456853653271992e-05, "loss": 1.7296, "step": 14849 }, { "epoch": 0.531810124089029, "grad_norm": 1.4650472402572632, "learning_rate": 9.455695467798927e-05, "loss": 1.4852, "step": 14850 }, { "epoch": 0.5318459362185972, "grad_norm": 1.7289412021636963, "learning_rate": 9.454537289648779e-05, "loss": 1.44, "step": 14851 }, { "epoch": 0.5318817483481655, "grad_norm": 1.5133506059646606, "learning_rate": 9.453379118837125e-05, "loss": 1.3471, "step": 14852 }, { "epoch": 0.5319175604777338, "grad_norm": 1.7192788124084473, "learning_rate": 9.452220955379553e-05, "loss": 1.6764, "step": 14853 }, { "epoch": 0.5319533726073021, "grad_norm": 2.162766456604004, "learning_rate": 9.45106279929164e-05, "loss": 1.3744, "step": 14854 }, { "epoch": 0.5319891847368704, "grad_norm": 1.7734508514404297, "learning_rate": 9.449904650588968e-05, "loss": 1.3988, "step": 14855 }, { "epoch": 0.5320249968664387, "grad_norm": 1.7432608604431152, "learning_rate": 9.44874650928712e-05, "loss": 1.2843, "step": 14856 }, { "epoch": 0.5320608089960069, "grad_norm": 1.3881850242614746, "learning_rate": 9.447588375401676e-05, "loss": 1.3026, "step": 14857 }, { "epoch": 0.5320966211255752, "grad_norm": 1.451550006866455, "learning_rate": 9.44643024894822e-05, "loss": 1.2257, "step": 14858 }, { "epoch": 0.5321324332551435, "grad_norm": 1.406640648841858, "learning_rate": 9.445272129942329e-05, "loss": 1.3289, "step": 14859 }, { "epoch": 0.5321682453847117, "grad_norm": 1.8054431676864624, "learning_rate": 9.444114018399588e-05, "loss": 1.4196, "step": 14860 }, { "epoch": 0.5322040575142801, "grad_norm": 1.7391663789749146, "learning_rate": 9.442955914335573e-05, "loss": 1.5092, "step": 14861 }, { "epoch": 0.5322398696438484, "grad_norm": 1.8281937837600708, "learning_rate": 9.441797817765869e-05, "loss": 1.2757, "step": 14862 }, { "epoch": 0.5322756817734167, "grad_norm": 1.541002869606018, "learning_rate": 9.440639728706058e-05, "loss": 1.2199, "step": 14863 }, { "epoch": 0.5323114939029849, "grad_norm": 1.8189629316329956, "learning_rate": 9.439481647171714e-05, "loss": 1.6036, "step": 14864 }, { "epoch": 0.5323473060325532, "grad_norm": 1.3375712633132935, "learning_rate": 9.438323573178424e-05, "loss": 1.4157, "step": 14865 }, { "epoch": 0.5323831181621215, "grad_norm": 2.0243418216705322, "learning_rate": 9.437165506741764e-05, "loss": 1.5781, "step": 14866 }, { "epoch": 0.5324189302916897, "grad_norm": 1.824872612953186, "learning_rate": 9.436007447877316e-05, "loss": 1.1952, "step": 14867 }, { "epoch": 0.5324547424212581, "grad_norm": 1.589608073234558, "learning_rate": 9.43484939660066e-05, "loss": 1.1883, "step": 14868 }, { "epoch": 0.5324905545508264, "grad_norm": 1.530319333076477, "learning_rate": 9.433691352927378e-05, "loss": 1.3574, "step": 14869 }, { "epoch": 0.5325263666803947, "grad_norm": 1.7386776208877563, "learning_rate": 9.43253331687305e-05, "loss": 1.5034, "step": 14870 }, { "epoch": 0.5325621788099629, "grad_norm": 1.9396528005599976, "learning_rate": 9.43137528845325e-05, "loss": 1.5202, "step": 14871 }, { "epoch": 0.5325979909395312, "grad_norm": 1.8094663619995117, "learning_rate": 9.430217267683566e-05, "loss": 1.7822, "step": 14872 }, { "epoch": 0.5326338030690995, "grad_norm": 1.1985933780670166, "learning_rate": 9.42905925457957e-05, "loss": 1.4856, "step": 14873 }, { "epoch": 0.5326696151986677, "grad_norm": 1.9986737966537476, "learning_rate": 9.427901249156847e-05, "loss": 1.5435, "step": 14874 }, { "epoch": 0.5327054273282361, "grad_norm": 2.1009483337402344, "learning_rate": 9.426743251430974e-05, "loss": 1.5721, "step": 14875 }, { "epoch": 0.5327412394578044, "grad_norm": 1.6523008346557617, "learning_rate": 9.425585261417533e-05, "loss": 1.4228, "step": 14876 }, { "epoch": 0.5327770515873727, "grad_norm": 1.8854312896728516, "learning_rate": 9.424427279132099e-05, "loss": 1.4939, "step": 14877 }, { "epoch": 0.5328128637169409, "grad_norm": 2.156968355178833, "learning_rate": 9.423269304590256e-05, "loss": 0.997, "step": 14878 }, { "epoch": 0.5328486758465092, "grad_norm": 1.7135142087936401, "learning_rate": 9.42211133780758e-05, "loss": 1.5855, "step": 14879 }, { "epoch": 0.5328844879760775, "grad_norm": 1.341581106185913, "learning_rate": 9.420953378799649e-05, "loss": 1.3719, "step": 14880 }, { "epoch": 0.5329203001056457, "grad_norm": 1.8618860244750977, "learning_rate": 9.419795427582044e-05, "loss": 1.3623, "step": 14881 }, { "epoch": 0.5329561122352141, "grad_norm": 2.2615368366241455, "learning_rate": 9.418637484170344e-05, "loss": 1.4273, "step": 14882 }, { "epoch": 0.5329919243647824, "grad_norm": 1.9147205352783203, "learning_rate": 9.417479548580126e-05, "loss": 1.5137, "step": 14883 }, { "epoch": 0.5330277364943506, "grad_norm": 1.4778187274932861, "learning_rate": 9.416321620826968e-05, "loss": 1.8574, "step": 14884 }, { "epoch": 0.5330635486239189, "grad_norm": 1.726284384727478, "learning_rate": 9.415163700926451e-05, "loss": 1.362, "step": 14885 }, { "epoch": 0.5330993607534872, "grad_norm": 2.3607828617095947, "learning_rate": 9.414005788894151e-05, "loss": 1.7774, "step": 14886 }, { "epoch": 0.5331351728830555, "grad_norm": 1.7209479808807373, "learning_rate": 9.41284788474565e-05, "loss": 1.3326, "step": 14887 }, { "epoch": 0.5331709850126237, "grad_norm": 1.455073356628418, "learning_rate": 9.411689988496526e-05, "loss": 1.7173, "step": 14888 }, { "epoch": 0.5332067971421921, "grad_norm": 1.9332654476165771, "learning_rate": 9.410532100162344e-05, "loss": 1.4577, "step": 14889 }, { "epoch": 0.5332426092717604, "grad_norm": 1.50914466381073, "learning_rate": 9.409374219758702e-05, "loss": 1.2237, "step": 14890 }, { "epoch": 0.5332784214013286, "grad_norm": 1.5229016542434692, "learning_rate": 9.408216347301161e-05, "loss": 1.5624, "step": 14891 }, { "epoch": 0.5333142335308969, "grad_norm": 2.082638740539551, "learning_rate": 9.40705848280531e-05, "loss": 1.3889, "step": 14892 }, { "epoch": 0.5333500456604652, "grad_norm": 1.3961759805679321, "learning_rate": 9.40590062628672e-05, "loss": 1.5499, "step": 14893 }, { "epoch": 0.5333858577900334, "grad_norm": 1.6336990594863892, "learning_rate": 9.404742777760974e-05, "loss": 1.3981, "step": 14894 }, { "epoch": 0.5334216699196017, "grad_norm": 1.8657147884368896, "learning_rate": 9.403584937243642e-05, "loss": 1.5618, "step": 14895 }, { "epoch": 0.5334574820491701, "grad_norm": 2.1878414154052734, "learning_rate": 9.402427104750308e-05, "loss": 1.4487, "step": 14896 }, { "epoch": 0.5334932941787384, "grad_norm": 1.487822413444519, "learning_rate": 9.401269280296549e-05, "loss": 1.2005, "step": 14897 }, { "epoch": 0.5335291063083066, "grad_norm": 1.6004787683486938, "learning_rate": 9.400111463897932e-05, "loss": 1.5498, "step": 14898 }, { "epoch": 0.5335649184378749, "grad_norm": 1.8500304222106934, "learning_rate": 9.39895365557005e-05, "loss": 1.4665, "step": 14899 }, { "epoch": 0.5336007305674432, "grad_norm": 1.6620436906814575, "learning_rate": 9.397795855328464e-05, "loss": 1.6153, "step": 14900 }, { "epoch": 0.5336365426970114, "grad_norm": 1.6935945749282837, "learning_rate": 9.396638063188764e-05, "loss": 1.6425, "step": 14901 }, { "epoch": 0.5336723548265797, "grad_norm": 1.664432168006897, "learning_rate": 9.395480279166514e-05, "loss": 1.6335, "step": 14902 }, { "epoch": 0.5337081669561481, "grad_norm": 2.122021436691284, "learning_rate": 9.394322503277305e-05, "loss": 1.3633, "step": 14903 }, { "epoch": 0.5337439790857164, "grad_norm": 1.5068094730377197, "learning_rate": 9.393164735536696e-05, "loss": 1.3178, "step": 14904 }, { "epoch": 0.5337797912152846, "grad_norm": 1.499644160270691, "learning_rate": 9.39200697596028e-05, "loss": 1.3545, "step": 14905 }, { "epoch": 0.5338156033448529, "grad_norm": 1.8724576234817505, "learning_rate": 9.390849224563627e-05, "loss": 1.5587, "step": 14906 }, { "epoch": 0.5338514154744212, "grad_norm": 2.1973133087158203, "learning_rate": 9.389691481362304e-05, "loss": 1.4083, "step": 14907 }, { "epoch": 0.5338872276039894, "grad_norm": 2.883030414581299, "learning_rate": 9.388533746371904e-05, "loss": 1.3479, "step": 14908 }, { "epoch": 0.5339230397335577, "grad_norm": 2.0845448970794678, "learning_rate": 9.387376019607985e-05, "loss": 1.248, "step": 14909 }, { "epoch": 0.5339588518631261, "grad_norm": 1.4338700771331787, "learning_rate": 9.386218301086139e-05, "loss": 1.3079, "step": 14910 }, { "epoch": 0.5339946639926944, "grad_norm": 2.109936475753784, "learning_rate": 9.385060590821929e-05, "loss": 1.4467, "step": 14911 }, { "epoch": 0.5340304761222626, "grad_norm": 1.9435588121414185, "learning_rate": 9.38390288883094e-05, "loss": 1.5993, "step": 14912 }, { "epoch": 0.5340662882518309, "grad_norm": 1.5473748445510864, "learning_rate": 9.382745195128736e-05, "loss": 1.3432, "step": 14913 }, { "epoch": 0.5341021003813992, "grad_norm": 1.929419994354248, "learning_rate": 9.381587509730907e-05, "loss": 1.627, "step": 14914 }, { "epoch": 0.5341379125109674, "grad_norm": 1.4407709836959839, "learning_rate": 9.380429832653017e-05, "loss": 1.2589, "step": 14915 }, { "epoch": 0.5341737246405357, "grad_norm": 1.594947099685669, "learning_rate": 9.379272163910643e-05, "loss": 1.3406, "step": 14916 }, { "epoch": 0.5342095367701041, "grad_norm": 1.8034594058990479, "learning_rate": 9.378114503519364e-05, "loss": 1.2251, "step": 14917 }, { "epoch": 0.5342453488996723, "grad_norm": 1.5109189748764038, "learning_rate": 9.376956851494747e-05, "loss": 1.5412, "step": 14918 }, { "epoch": 0.5342811610292406, "grad_norm": 1.4823503494262695, "learning_rate": 9.375799207852379e-05, "loss": 1.5642, "step": 14919 }, { "epoch": 0.5343169731588089, "grad_norm": 1.2817720174789429, "learning_rate": 9.37464157260782e-05, "loss": 1.4638, "step": 14920 }, { "epoch": 0.5343527852883772, "grad_norm": 1.9059113264083862, "learning_rate": 9.37348394577666e-05, "loss": 1.6068, "step": 14921 }, { "epoch": 0.5343885974179454, "grad_norm": 1.876328945159912, "learning_rate": 9.372326327374459e-05, "loss": 1.377, "step": 14922 }, { "epoch": 0.5344244095475137, "grad_norm": 2.0040924549102783, "learning_rate": 9.371168717416803e-05, "loss": 1.3396, "step": 14923 }, { "epoch": 0.5344602216770821, "grad_norm": 2.007840156555176, "learning_rate": 9.370011115919258e-05, "loss": 1.6018, "step": 14924 }, { "epoch": 0.5344960338066503, "grad_norm": 1.4200763702392578, "learning_rate": 9.368853522897399e-05, "loss": 1.435, "step": 14925 }, { "epoch": 0.5345318459362186, "grad_norm": 1.4511983394622803, "learning_rate": 9.367695938366805e-05, "loss": 1.2733, "step": 14926 }, { "epoch": 0.5345676580657869, "grad_norm": 1.3073828220367432, "learning_rate": 9.366538362343043e-05, "loss": 1.504, "step": 14927 }, { "epoch": 0.5346034701953551, "grad_norm": 1.3790490627288818, "learning_rate": 9.365380794841694e-05, "loss": 1.4996, "step": 14928 }, { "epoch": 0.5346392823249234, "grad_norm": 2.486955165863037, "learning_rate": 9.364223235878324e-05, "loss": 1.5752, "step": 14929 }, { "epoch": 0.5346750944544917, "grad_norm": 2.4846272468566895, "learning_rate": 9.363065685468514e-05, "loss": 1.585, "step": 14930 }, { "epoch": 0.5347109065840601, "grad_norm": 1.5754536390304565, "learning_rate": 9.361908143627829e-05, "loss": 1.3481, "step": 14931 }, { "epoch": 0.5347467187136283, "grad_norm": 1.701556921005249, "learning_rate": 9.360750610371852e-05, "loss": 2.0244, "step": 14932 }, { "epoch": 0.5347825308431966, "grad_norm": 1.8549612760543823, "learning_rate": 9.35959308571615e-05, "loss": 1.4935, "step": 14933 }, { "epoch": 0.5348183429727649, "grad_norm": 1.695146918296814, "learning_rate": 9.358435569676295e-05, "loss": 1.3858, "step": 14934 }, { "epoch": 0.5348541551023331, "grad_norm": 1.631511926651001, "learning_rate": 9.357278062267863e-05, "loss": 1.59, "step": 14935 }, { "epoch": 0.5348899672319014, "grad_norm": 2.117126703262329, "learning_rate": 9.356120563506424e-05, "loss": 1.409, "step": 14936 }, { "epoch": 0.5349257793614697, "grad_norm": 1.4399936199188232, "learning_rate": 9.354963073407555e-05, "loss": 1.6277, "step": 14937 }, { "epoch": 0.5349615914910381, "grad_norm": 1.9248534440994263, "learning_rate": 9.353805591986822e-05, "loss": 1.2877, "step": 14938 }, { "epoch": 0.5349974036206063, "grad_norm": 1.6802445650100708, "learning_rate": 9.352648119259804e-05, "loss": 1.3423, "step": 14939 }, { "epoch": 0.5350332157501746, "grad_norm": 1.4071083068847656, "learning_rate": 9.35149065524207e-05, "loss": 1.1726, "step": 14940 }, { "epoch": 0.5350690278797429, "grad_norm": 2.1739704608917236, "learning_rate": 9.350333199949193e-05, "loss": 1.7501, "step": 14941 }, { "epoch": 0.5351048400093111, "grad_norm": 2.068229913711548, "learning_rate": 9.349175753396746e-05, "loss": 1.7553, "step": 14942 }, { "epoch": 0.5351406521388794, "grad_norm": 2.328054666519165, "learning_rate": 9.348018315600297e-05, "loss": 1.5384, "step": 14943 }, { "epoch": 0.5351764642684477, "grad_norm": 1.6056578159332275, "learning_rate": 9.346860886575422e-05, "loss": 1.6585, "step": 14944 }, { "epoch": 0.535212276398016, "grad_norm": 1.6351563930511475, "learning_rate": 9.34570346633769e-05, "loss": 1.5569, "step": 14945 }, { "epoch": 0.5352480885275843, "grad_norm": 1.8765473365783691, "learning_rate": 9.344546054902677e-05, "loss": 1.6506, "step": 14946 }, { "epoch": 0.5352839006571526, "grad_norm": 2.3817811012268066, "learning_rate": 9.343388652285947e-05, "loss": 1.5692, "step": 14947 }, { "epoch": 0.5353197127867209, "grad_norm": 2.1171789169311523, "learning_rate": 9.342231258503079e-05, "loss": 1.9199, "step": 14948 }, { "epoch": 0.5353555249162891, "grad_norm": 1.4918116331100464, "learning_rate": 9.34107387356964e-05, "loss": 1.3828, "step": 14949 }, { "epoch": 0.5353913370458574, "grad_norm": 1.5187290906906128, "learning_rate": 9.339916497501202e-05, "loss": 1.5775, "step": 14950 }, { "epoch": 0.5354271491754257, "grad_norm": 1.756986379623413, "learning_rate": 9.338759130313338e-05, "loss": 1.7123, "step": 14951 }, { "epoch": 0.535462961304994, "grad_norm": 1.6845223903656006, "learning_rate": 9.337601772021612e-05, "loss": 1.1958, "step": 14952 }, { "epoch": 0.5354987734345623, "grad_norm": 1.5527392625808716, "learning_rate": 9.336444422641605e-05, "loss": 1.4844, "step": 14953 }, { "epoch": 0.5355345855641306, "grad_norm": 1.50359046459198, "learning_rate": 9.335287082188878e-05, "loss": 1.5718, "step": 14954 }, { "epoch": 0.5355703976936989, "grad_norm": 1.854921579360962, "learning_rate": 9.334129750679009e-05, "loss": 1.6565, "step": 14955 }, { "epoch": 0.5356062098232671, "grad_norm": 2.034636974334717, "learning_rate": 9.332972428127563e-05, "loss": 1.3642, "step": 14956 }, { "epoch": 0.5356420219528354, "grad_norm": 1.7532157897949219, "learning_rate": 9.331815114550115e-05, "loss": 1.5281, "step": 14957 }, { "epoch": 0.5356778340824037, "grad_norm": 1.6303654909133911, "learning_rate": 9.330657809962231e-05, "loss": 1.2697, "step": 14958 }, { "epoch": 0.535713646211972, "grad_norm": 1.6121984720230103, "learning_rate": 9.329500514379485e-05, "loss": 1.4018, "step": 14959 }, { "epoch": 0.5357494583415403, "grad_norm": 1.9481133222579956, "learning_rate": 9.328343227817443e-05, "loss": 1.8712, "step": 14960 }, { "epoch": 0.5357852704711086, "grad_norm": 2.3141913414001465, "learning_rate": 9.327185950291676e-05, "loss": 1.7397, "step": 14961 }, { "epoch": 0.5358210826006768, "grad_norm": 2.181881904602051, "learning_rate": 9.326028681817755e-05, "loss": 1.4634, "step": 14962 }, { "epoch": 0.5358568947302451, "grad_norm": 1.6960126161575317, "learning_rate": 9.324871422411248e-05, "loss": 1.4148, "step": 14963 }, { "epoch": 0.5358927068598134, "grad_norm": 1.1461007595062256, "learning_rate": 9.323714172087726e-05, "loss": 1.4833, "step": 14964 }, { "epoch": 0.5359285189893817, "grad_norm": 1.4815764427185059, "learning_rate": 9.322556930862757e-05, "loss": 1.3105, "step": 14965 }, { "epoch": 0.53596433111895, "grad_norm": 1.6154530048370361, "learning_rate": 9.321399698751912e-05, "loss": 1.2409, "step": 14966 }, { "epoch": 0.5360001432485183, "grad_norm": 1.841649055480957, "learning_rate": 9.320242475770756e-05, "loss": 1.3561, "step": 14967 }, { "epoch": 0.5360359553780866, "grad_norm": 2.050635814666748, "learning_rate": 9.319085261934864e-05, "loss": 1.5387, "step": 14968 }, { "epoch": 0.5360717675076548, "grad_norm": 1.3458319902420044, "learning_rate": 9.317928057259799e-05, "loss": 1.5461, "step": 14969 }, { "epoch": 0.5361075796372231, "grad_norm": 1.581529974937439, "learning_rate": 9.316770861761132e-05, "loss": 1.6266, "step": 14970 }, { "epoch": 0.5361433917667914, "grad_norm": 1.65879487991333, "learning_rate": 9.315613675454435e-05, "loss": 1.7114, "step": 14971 }, { "epoch": 0.5361792038963596, "grad_norm": 1.7967455387115479, "learning_rate": 9.314456498355269e-05, "loss": 1.2497, "step": 14972 }, { "epoch": 0.536215016025928, "grad_norm": 1.6957311630249023, "learning_rate": 9.313299330479209e-05, "loss": 1.3297, "step": 14973 }, { "epoch": 0.5362508281554963, "grad_norm": 1.4996039867401123, "learning_rate": 9.31214217184182e-05, "loss": 1.5683, "step": 14974 }, { "epoch": 0.5362866402850646, "grad_norm": 1.9086555242538452, "learning_rate": 9.31098502245867e-05, "loss": 1.507, "step": 14975 }, { "epoch": 0.5363224524146328, "grad_norm": 1.6246367692947388, "learning_rate": 9.30982788234533e-05, "loss": 1.4698, "step": 14976 }, { "epoch": 0.5363582645442011, "grad_norm": 1.8579511642456055, "learning_rate": 9.308670751517363e-05, "loss": 1.4766, "step": 14977 }, { "epoch": 0.5363940766737694, "grad_norm": 2.250375270843506, "learning_rate": 9.307513629990342e-05, "loss": 1.3034, "step": 14978 }, { "epoch": 0.5364298888033376, "grad_norm": 1.5072667598724365, "learning_rate": 9.306356517779828e-05, "loss": 1.5313, "step": 14979 }, { "epoch": 0.536465700932906, "grad_norm": 3.7206661701202393, "learning_rate": 9.305199414901397e-05, "loss": 1.8558, "step": 14980 }, { "epoch": 0.5365015130624743, "grad_norm": 1.4382824897766113, "learning_rate": 9.304042321370607e-05, "loss": 1.4454, "step": 14981 }, { "epoch": 0.5365373251920426, "grad_norm": 2.4771111011505127, "learning_rate": 9.302885237203034e-05, "loss": 1.402, "step": 14982 }, { "epoch": 0.5365731373216108, "grad_norm": 1.8501386642456055, "learning_rate": 9.301728162414238e-05, "loss": 1.2341, "step": 14983 }, { "epoch": 0.5366089494511791, "grad_norm": 1.4732383489608765, "learning_rate": 9.30057109701979e-05, "loss": 1.5761, "step": 14984 }, { "epoch": 0.5366447615807474, "grad_norm": 2.0961062908172607, "learning_rate": 9.299414041035259e-05, "loss": 1.7277, "step": 14985 }, { "epoch": 0.5366805737103156, "grad_norm": 1.619267463684082, "learning_rate": 9.298256994476202e-05, "loss": 1.5175, "step": 14986 }, { "epoch": 0.536716385839884, "grad_norm": 1.4270780086517334, "learning_rate": 9.297099957358199e-05, "loss": 1.695, "step": 14987 }, { "epoch": 0.5367521979694523, "grad_norm": 1.7265422344207764, "learning_rate": 9.295942929696801e-05, "loss": 1.2141, "step": 14988 }, { "epoch": 0.5367880100990206, "grad_norm": 2.4905295372009277, "learning_rate": 9.294785911507589e-05, "loss": 1.3067, "step": 14989 }, { "epoch": 0.5368238222285888, "grad_norm": 1.9801867008209229, "learning_rate": 9.29362890280612e-05, "loss": 1.8363, "step": 14990 }, { "epoch": 0.5368596343581571, "grad_norm": 1.467184066772461, "learning_rate": 9.292471903607964e-05, "loss": 1.6563, "step": 14991 }, { "epoch": 0.5368954464877254, "grad_norm": 1.6070553064346313, "learning_rate": 9.291314913928685e-05, "loss": 1.35, "step": 14992 }, { "epoch": 0.5369312586172936, "grad_norm": 1.391000747680664, "learning_rate": 9.290157933783852e-05, "loss": 1.2004, "step": 14993 }, { "epoch": 0.536967070746862, "grad_norm": 1.8240994215011597, "learning_rate": 9.28900096318903e-05, "loss": 1.3666, "step": 14994 }, { "epoch": 0.5370028828764303, "grad_norm": 1.497835636138916, "learning_rate": 9.287844002159776e-05, "loss": 1.73, "step": 14995 }, { "epoch": 0.5370386950059985, "grad_norm": 1.8223732709884644, "learning_rate": 9.286687050711668e-05, "loss": 1.3505, "step": 14996 }, { "epoch": 0.5370745071355668, "grad_norm": 1.5214042663574219, "learning_rate": 9.285530108860262e-05, "loss": 1.5422, "step": 14997 }, { "epoch": 0.5371103192651351, "grad_norm": 1.55804443359375, "learning_rate": 9.284373176621131e-05, "loss": 1.5534, "step": 14998 }, { "epoch": 0.5371461313947034, "grad_norm": 2.063025712966919, "learning_rate": 9.28321625400983e-05, "loss": 1.4235, "step": 14999 }, { "epoch": 0.5371819435242716, "grad_norm": 1.5878913402557373, "learning_rate": 9.282059341041936e-05, "loss": 1.5036, "step": 15000 }, { "epoch": 0.53721775565384, "grad_norm": 1.506630778312683, "learning_rate": 9.280902437733003e-05, "loss": 1.208, "step": 15001 }, { "epoch": 0.5372535677834083, "grad_norm": 1.781821846961975, "learning_rate": 9.279745544098602e-05, "loss": 1.4643, "step": 15002 }, { "epoch": 0.5372893799129765, "grad_norm": 1.9320119619369507, "learning_rate": 9.278588660154298e-05, "loss": 1.6491, "step": 15003 }, { "epoch": 0.5373251920425448, "grad_norm": 1.846867322921753, "learning_rate": 9.277431785915647e-05, "loss": 1.3241, "step": 15004 }, { "epoch": 0.5373610041721131, "grad_norm": 1.7389922142028809, "learning_rate": 9.276274921398225e-05, "loss": 1.5716, "step": 15005 }, { "epoch": 0.5373968163016813, "grad_norm": 1.6828988790512085, "learning_rate": 9.275118066617585e-05, "loss": 1.5801, "step": 15006 }, { "epoch": 0.5374326284312496, "grad_norm": 1.8095980882644653, "learning_rate": 9.273961221589303e-05, "loss": 1.3198, "step": 15007 }, { "epoch": 0.537468440560818, "grad_norm": 1.7171796560287476, "learning_rate": 9.27280438632893e-05, "loss": 1.4818, "step": 15008 }, { "epoch": 0.5375042526903863, "grad_norm": 1.8513648509979248, "learning_rate": 9.271647560852042e-05, "loss": 1.6135, "step": 15009 }, { "epoch": 0.5375400648199545, "grad_norm": 1.5115734338760376, "learning_rate": 9.27049074517419e-05, "loss": 1.6142, "step": 15010 }, { "epoch": 0.5375758769495228, "grad_norm": 2.058396100997925, "learning_rate": 9.26933393931095e-05, "loss": 1.4059, "step": 15011 }, { "epoch": 0.5376116890790911, "grad_norm": 1.8577311038970947, "learning_rate": 9.268177143277877e-05, "loss": 1.5906, "step": 15012 }, { "epoch": 0.5376475012086593, "grad_norm": 1.5660582780838013, "learning_rate": 9.267020357090535e-05, "loss": 1.4381, "step": 15013 }, { "epoch": 0.5376833133382276, "grad_norm": 1.3787236213684082, "learning_rate": 9.265863580764492e-05, "loss": 1.4798, "step": 15014 }, { "epoch": 0.537719125467796, "grad_norm": 2.0579323768615723, "learning_rate": 9.264706814315302e-05, "loss": 1.571, "step": 15015 }, { "epoch": 0.5377549375973643, "grad_norm": 1.3962444067001343, "learning_rate": 9.263550057758539e-05, "loss": 1.59, "step": 15016 }, { "epoch": 0.5377907497269325, "grad_norm": 1.7206778526306152, "learning_rate": 9.262393311109754e-05, "loss": 1.3043, "step": 15017 }, { "epoch": 0.5378265618565008, "grad_norm": 1.2327122688293457, "learning_rate": 9.261236574384523e-05, "loss": 1.4594, "step": 15018 }, { "epoch": 0.5378623739860691, "grad_norm": 2.1515252590179443, "learning_rate": 9.260079847598393e-05, "loss": 1.4466, "step": 15019 }, { "epoch": 0.5378981861156373, "grad_norm": 1.5963287353515625, "learning_rate": 9.258923130766942e-05, "loss": 1.4193, "step": 15020 }, { "epoch": 0.5379339982452056, "grad_norm": 1.5895057916641235, "learning_rate": 9.257766423905722e-05, "loss": 1.724, "step": 15021 }, { "epoch": 0.537969810374774, "grad_norm": 1.5199759006500244, "learning_rate": 9.256609727030294e-05, "loss": 1.645, "step": 15022 }, { "epoch": 0.5380056225043423, "grad_norm": 1.6282600164413452, "learning_rate": 9.255453040156228e-05, "loss": 1.43, "step": 15023 }, { "epoch": 0.5380414346339105, "grad_norm": 1.6002341508865356, "learning_rate": 9.254296363299077e-05, "loss": 1.4412, "step": 15024 }, { "epoch": 0.5380772467634788, "grad_norm": 2.265721559524536, "learning_rate": 9.253139696474409e-05, "loss": 1.5447, "step": 15025 }, { "epoch": 0.5381130588930471, "grad_norm": 3.0661771297454834, "learning_rate": 9.25198303969778e-05, "loss": 1.6547, "step": 15026 }, { "epoch": 0.5381488710226153, "grad_norm": 2.4014828205108643, "learning_rate": 9.250826392984757e-05, "loss": 1.3629, "step": 15027 }, { "epoch": 0.5381846831521836, "grad_norm": 2.069535732269287, "learning_rate": 9.249669756350894e-05, "loss": 1.6487, "step": 15028 }, { "epoch": 0.538220495281752, "grad_norm": 1.3042799234390259, "learning_rate": 9.248513129811765e-05, "loss": 1.197, "step": 15029 }, { "epoch": 0.5382563074113202, "grad_norm": 1.619163990020752, "learning_rate": 9.247356513382917e-05, "loss": 1.5524, "step": 15030 }, { "epoch": 0.5382921195408885, "grad_norm": 1.8219351768493652, "learning_rate": 9.246199907079916e-05, "loss": 1.4046, "step": 15031 }, { "epoch": 0.5383279316704568, "grad_norm": 1.6462182998657227, "learning_rate": 9.245043310918325e-05, "loss": 1.5918, "step": 15032 }, { "epoch": 0.538363743800025, "grad_norm": 2.164386034011841, "learning_rate": 9.2438867249137e-05, "loss": 1.681, "step": 15033 }, { "epoch": 0.5383995559295933, "grad_norm": 1.3424277305603027, "learning_rate": 9.242730149081606e-05, "loss": 1.6123, "step": 15034 }, { "epoch": 0.5384353680591616, "grad_norm": 1.6956967115402222, "learning_rate": 9.241573583437599e-05, "loss": 1.6094, "step": 15035 }, { "epoch": 0.53847118018873, "grad_norm": 1.7979881763458252, "learning_rate": 9.240417027997243e-05, "loss": 1.4714, "step": 15036 }, { "epoch": 0.5385069923182982, "grad_norm": 1.8987326622009277, "learning_rate": 9.239260482776096e-05, "loss": 1.5237, "step": 15037 }, { "epoch": 0.5385428044478665, "grad_norm": 1.9217215776443481, "learning_rate": 9.238103947789718e-05, "loss": 1.458, "step": 15038 }, { "epoch": 0.5385786165774348, "grad_norm": 2.3088903427124023, "learning_rate": 9.236947423053669e-05, "loss": 1.6509, "step": 15039 }, { "epoch": 0.538614428707003, "grad_norm": 1.4139968156814575, "learning_rate": 9.235790908583506e-05, "loss": 1.3816, "step": 15040 }, { "epoch": 0.5386502408365713, "grad_norm": 2.1365206241607666, "learning_rate": 9.234634404394793e-05, "loss": 1.4619, "step": 15041 }, { "epoch": 0.5386860529661396, "grad_norm": 2.3481082916259766, "learning_rate": 9.233477910503083e-05, "loss": 1.5677, "step": 15042 }, { "epoch": 0.538721865095708, "grad_norm": 2.0316877365112305, "learning_rate": 9.232321426923943e-05, "loss": 1.3205, "step": 15043 }, { "epoch": 0.5387576772252762, "grad_norm": 1.9657565355300903, "learning_rate": 9.231164953672926e-05, "loss": 1.6539, "step": 15044 }, { "epoch": 0.5387934893548445, "grad_norm": 1.8171610832214355, "learning_rate": 9.230008490765593e-05, "loss": 1.1466, "step": 15045 }, { "epoch": 0.5388293014844128, "grad_norm": 1.1965806484222412, "learning_rate": 9.228852038217502e-05, "loss": 1.2931, "step": 15046 }, { "epoch": 0.538865113613981, "grad_norm": 1.6096956729888916, "learning_rate": 9.227695596044215e-05, "loss": 1.4272, "step": 15047 }, { "epoch": 0.5389009257435493, "grad_norm": 2.070911407470703, "learning_rate": 9.226539164261286e-05, "loss": 1.3034, "step": 15048 }, { "epoch": 0.5389367378731176, "grad_norm": 2.4568874835968018, "learning_rate": 9.225382742884273e-05, "loss": 1.4975, "step": 15049 }, { "epoch": 0.538972550002686, "grad_norm": 1.6348614692687988, "learning_rate": 9.224226331928738e-05, "loss": 1.5438, "step": 15050 }, { "epoch": 0.5390083621322542, "grad_norm": 1.6172714233398438, "learning_rate": 9.223069931410236e-05, "loss": 1.3941, "step": 15051 }, { "epoch": 0.5390441742618225, "grad_norm": 1.6091904640197754, "learning_rate": 9.221913541344327e-05, "loss": 1.4381, "step": 15052 }, { "epoch": 0.5390799863913908, "grad_norm": 1.614936351776123, "learning_rate": 9.220757161746566e-05, "loss": 1.5637, "step": 15053 }, { "epoch": 0.539115798520959, "grad_norm": 2.01474928855896, "learning_rate": 9.219600792632513e-05, "loss": 1.4325, "step": 15054 }, { "epoch": 0.5391516106505273, "grad_norm": 1.680156946182251, "learning_rate": 9.218444434017724e-05, "loss": 1.6222, "step": 15055 }, { "epoch": 0.5391874227800956, "grad_norm": 1.7810890674591064, "learning_rate": 9.217288085917759e-05, "loss": 1.4141, "step": 15056 }, { "epoch": 0.539223234909664, "grad_norm": 1.8147783279418945, "learning_rate": 9.216131748348174e-05, "loss": 1.6173, "step": 15057 }, { "epoch": 0.5392590470392322, "grad_norm": 1.4602004289627075, "learning_rate": 9.21497542132452e-05, "loss": 1.4772, "step": 15058 }, { "epoch": 0.5392948591688005, "grad_norm": 1.867448329925537, "learning_rate": 9.213819104862365e-05, "loss": 1.3885, "step": 15059 }, { "epoch": 0.5393306712983688, "grad_norm": 1.4362049102783203, "learning_rate": 9.212662798977256e-05, "loss": 1.5253, "step": 15060 }, { "epoch": 0.539366483427937, "grad_norm": 1.3346989154815674, "learning_rate": 9.211506503684755e-05, "loss": 1.2639, "step": 15061 }, { "epoch": 0.5394022955575053, "grad_norm": 1.570544958114624, "learning_rate": 9.210350219000416e-05, "loss": 1.6639, "step": 15062 }, { "epoch": 0.5394381076870736, "grad_norm": 1.4797194004058838, "learning_rate": 9.209193944939798e-05, "loss": 1.4074, "step": 15063 }, { "epoch": 0.539473919816642, "grad_norm": 1.966138243675232, "learning_rate": 9.208037681518454e-05, "loss": 1.7929, "step": 15064 }, { "epoch": 0.5395097319462102, "grad_norm": 1.6170921325683594, "learning_rate": 9.206881428751941e-05, "loss": 1.2353, "step": 15065 }, { "epoch": 0.5395455440757785, "grad_norm": 1.8537172079086304, "learning_rate": 9.205725186655817e-05, "loss": 1.466, "step": 15066 }, { "epoch": 0.5395813562053468, "grad_norm": 1.2693291902542114, "learning_rate": 9.204568955245634e-05, "loss": 1.4357, "step": 15067 }, { "epoch": 0.539617168334915, "grad_norm": 1.3635295629501343, "learning_rate": 9.203412734536951e-05, "loss": 1.4153, "step": 15068 }, { "epoch": 0.5396529804644833, "grad_norm": 1.594772219657898, "learning_rate": 9.202256524545322e-05, "loss": 1.3883, "step": 15069 }, { "epoch": 0.5396887925940516, "grad_norm": 1.3356691598892212, "learning_rate": 9.201100325286302e-05, "loss": 1.4835, "step": 15070 }, { "epoch": 0.5397246047236199, "grad_norm": 2.0734238624572754, "learning_rate": 9.199944136775446e-05, "loss": 1.5278, "step": 15071 }, { "epoch": 0.5397604168531882, "grad_norm": 1.533120036125183, "learning_rate": 9.198787959028312e-05, "loss": 1.536, "step": 15072 }, { "epoch": 0.5397962289827565, "grad_norm": 1.554632544517517, "learning_rate": 9.197631792060453e-05, "loss": 1.365, "step": 15073 }, { "epoch": 0.5398320411123247, "grad_norm": 1.5368367433547974, "learning_rate": 9.196475635887419e-05, "loss": 1.6686, "step": 15074 }, { "epoch": 0.539867853241893, "grad_norm": 1.6258572340011597, "learning_rate": 9.195319490524772e-05, "loss": 1.3955, "step": 15075 }, { "epoch": 0.5399036653714613, "grad_norm": 1.443794846534729, "learning_rate": 9.194163355988062e-05, "loss": 1.3147, "step": 15076 }, { "epoch": 0.5399394775010296, "grad_norm": 1.916318655014038, "learning_rate": 9.193007232292846e-05, "loss": 1.7055, "step": 15077 }, { "epoch": 0.5399752896305979, "grad_norm": 1.584694504737854, "learning_rate": 9.191851119454675e-05, "loss": 1.1801, "step": 15078 }, { "epoch": 0.5400111017601662, "grad_norm": 1.3532837629318237, "learning_rate": 9.190695017489106e-05, "loss": 1.3885, "step": 15079 }, { "epoch": 0.5400469138897345, "grad_norm": 2.9349710941314697, "learning_rate": 9.18953892641169e-05, "loss": 1.4769, "step": 15080 }, { "epoch": 0.5400827260193027, "grad_norm": 2.0163779258728027, "learning_rate": 9.188382846237984e-05, "loss": 1.4724, "step": 15081 }, { "epoch": 0.540118538148871, "grad_norm": 1.6981626749038696, "learning_rate": 9.187226776983543e-05, "loss": 1.676, "step": 15082 }, { "epoch": 0.5401543502784393, "grad_norm": 1.884413719177246, "learning_rate": 9.18607071866391e-05, "loss": 1.7854, "step": 15083 }, { "epoch": 0.5401901624080075, "grad_norm": 1.4751265048980713, "learning_rate": 9.184914671294653e-05, "loss": 1.3642, "step": 15084 }, { "epoch": 0.5402259745375759, "grad_norm": 1.6983938217163086, "learning_rate": 9.18375863489131e-05, "loss": 1.3347, "step": 15085 }, { "epoch": 0.5402617866671442, "grad_norm": 1.627390742301941, "learning_rate": 9.182602609469448e-05, "loss": 1.4281, "step": 15086 }, { "epoch": 0.5402975987967125, "grad_norm": 1.8508156538009644, "learning_rate": 9.18144659504461e-05, "loss": 1.2141, "step": 15087 }, { "epoch": 0.5403334109262807, "grad_norm": 1.926315188407898, "learning_rate": 9.180290591632354e-05, "loss": 1.5036, "step": 15088 }, { "epoch": 0.540369223055849, "grad_norm": 1.5384167432785034, "learning_rate": 9.179134599248228e-05, "loss": 1.3299, "step": 15089 }, { "epoch": 0.5404050351854173, "grad_norm": 1.4410181045532227, "learning_rate": 9.177978617907791e-05, "loss": 1.4095, "step": 15090 }, { "epoch": 0.5404408473149855, "grad_norm": 1.6018924713134766, "learning_rate": 9.176822647626593e-05, "loss": 1.627, "step": 15091 }, { "epoch": 0.5404766594445539, "grad_norm": 1.6720951795578003, "learning_rate": 9.175666688420177e-05, "loss": 1.5403, "step": 15092 }, { "epoch": 0.5405124715741222, "grad_norm": 1.8459980487823486, "learning_rate": 9.17451074030411e-05, "loss": 1.4794, "step": 15093 }, { "epoch": 0.5405482837036905, "grad_norm": 1.8089991807937622, "learning_rate": 9.17335480329393e-05, "loss": 1.6141, "step": 15094 }, { "epoch": 0.5405840958332587, "grad_norm": 1.9303104877471924, "learning_rate": 9.1721988774052e-05, "loss": 1.248, "step": 15095 }, { "epoch": 0.540619907962827, "grad_norm": 1.3829426765441895, "learning_rate": 9.17104296265346e-05, "loss": 1.2953, "step": 15096 }, { "epoch": 0.5406557200923953, "grad_norm": 1.5334969758987427, "learning_rate": 9.169887059054275e-05, "loss": 1.4165, "step": 15097 }, { "epoch": 0.5406915322219635, "grad_norm": 2.001737117767334, "learning_rate": 9.168731166623182e-05, "loss": 1.294, "step": 15098 }, { "epoch": 0.5407273443515319, "grad_norm": 3.4213063716888428, "learning_rate": 9.167575285375744e-05, "loss": 1.5401, "step": 15099 }, { "epoch": 0.5407631564811002, "grad_norm": 1.7544281482696533, "learning_rate": 9.166419415327508e-05, "loss": 1.4561, "step": 15100 }, { "epoch": 0.5407989686106685, "grad_norm": 1.7456077337265015, "learning_rate": 9.165263556494016e-05, "loss": 1.1195, "step": 15101 }, { "epoch": 0.5408347807402367, "grad_norm": 1.4219002723693848, "learning_rate": 9.164107708890835e-05, "loss": 1.1713, "step": 15102 }, { "epoch": 0.540870592869805, "grad_norm": 1.255926489830017, "learning_rate": 9.162951872533498e-05, "loss": 1.3403, "step": 15103 }, { "epoch": 0.5409064049993733, "grad_norm": 2.029594898223877, "learning_rate": 9.161796047437572e-05, "loss": 1.5172, "step": 15104 }, { "epoch": 0.5409422171289415, "grad_norm": 1.3873099088668823, "learning_rate": 9.160640233618591e-05, "loss": 1.314, "step": 15105 }, { "epoch": 0.5409780292585099, "grad_norm": 1.565388798713684, "learning_rate": 9.15948443109212e-05, "loss": 1.3721, "step": 15106 }, { "epoch": 0.5410138413880782, "grad_norm": 1.4900970458984375, "learning_rate": 9.158328639873695e-05, "loss": 1.3958, "step": 15107 }, { "epoch": 0.5410496535176464, "grad_norm": 1.506885051727295, "learning_rate": 9.15717285997888e-05, "loss": 1.3126, "step": 15108 }, { "epoch": 0.5410854656472147, "grad_norm": 1.5706779956817627, "learning_rate": 9.156017091423215e-05, "loss": 1.4397, "step": 15109 }, { "epoch": 0.541121277776783, "grad_norm": 1.3363198041915894, "learning_rate": 9.154861334222248e-05, "loss": 1.3809, "step": 15110 }, { "epoch": 0.5411570899063513, "grad_norm": 1.466255784034729, "learning_rate": 9.153705588391535e-05, "loss": 1.0968, "step": 15111 }, { "epoch": 0.5411929020359195, "grad_norm": 1.7009021043777466, "learning_rate": 9.152549853946615e-05, "loss": 1.1897, "step": 15112 }, { "epoch": 0.5412287141654879, "grad_norm": 1.6660683155059814, "learning_rate": 9.151394130903052e-05, "loss": 1.5026, "step": 15113 }, { "epoch": 0.5412645262950562, "grad_norm": 1.4293491840362549, "learning_rate": 9.15023841927638e-05, "loss": 1.7752, "step": 15114 }, { "epoch": 0.5413003384246244, "grad_norm": 1.6292088031768799, "learning_rate": 9.14908271908216e-05, "loss": 1.4279, "step": 15115 }, { "epoch": 0.5413361505541927, "grad_norm": 2.2113747596740723, "learning_rate": 9.147927030335928e-05, "loss": 1.5957, "step": 15116 }, { "epoch": 0.541371962683761, "grad_norm": 2.821272611618042, "learning_rate": 9.146771353053245e-05, "loss": 1.2521, "step": 15117 }, { "epoch": 0.5414077748133292, "grad_norm": 1.4644893407821655, "learning_rate": 9.14561568724965e-05, "loss": 1.1884, "step": 15118 }, { "epoch": 0.5414435869428975, "grad_norm": 1.7300174236297607, "learning_rate": 9.144460032940693e-05, "loss": 1.5423, "step": 15119 }, { "epoch": 0.5414793990724659, "grad_norm": 1.2341820001602173, "learning_rate": 9.143304390141925e-05, "loss": 1.0844, "step": 15120 }, { "epoch": 0.5415152112020342, "grad_norm": 1.7158368825912476, "learning_rate": 9.142148758868887e-05, "loss": 1.6006, "step": 15121 }, { "epoch": 0.5415510233316024, "grad_norm": 2.46889328956604, "learning_rate": 9.140993139137135e-05, "loss": 1.613, "step": 15122 }, { "epoch": 0.5415868354611707, "grad_norm": 1.745641827583313, "learning_rate": 9.139837530962209e-05, "loss": 1.3685, "step": 15123 }, { "epoch": 0.541622647590739, "grad_norm": 1.7009758949279785, "learning_rate": 9.138681934359663e-05, "loss": 1.5774, "step": 15124 }, { "epoch": 0.5416584597203072, "grad_norm": 2.010895252227783, "learning_rate": 9.137526349345036e-05, "loss": 1.421, "step": 15125 }, { "epoch": 0.5416942718498755, "grad_norm": 1.514216423034668, "learning_rate": 9.136370775933885e-05, "loss": 1.4046, "step": 15126 }, { "epoch": 0.5417300839794439, "grad_norm": 1.5812219381332397, "learning_rate": 9.135215214141751e-05, "loss": 1.4894, "step": 15127 }, { "epoch": 0.5417658961090122, "grad_norm": 1.7049273252487183, "learning_rate": 9.134059663984176e-05, "loss": 1.7633, "step": 15128 }, { "epoch": 0.5418017082385804, "grad_norm": 1.498794674873352, "learning_rate": 9.132904125476715e-05, "loss": 1.3923, "step": 15129 }, { "epoch": 0.5418375203681487, "grad_norm": 2.9248688220977783, "learning_rate": 9.131748598634907e-05, "loss": 1.9405, "step": 15130 }, { "epoch": 0.541873332497717, "grad_norm": 1.411406397819519, "learning_rate": 9.130593083474305e-05, "loss": 1.2329, "step": 15131 }, { "epoch": 0.5419091446272852, "grad_norm": 1.7705422639846802, "learning_rate": 9.129437580010449e-05, "loss": 1.3942, "step": 15132 }, { "epoch": 0.5419449567568535, "grad_norm": 2.8008735179901123, "learning_rate": 9.12828208825889e-05, "loss": 1.4574, "step": 15133 }, { "epoch": 0.5419807688864219, "grad_norm": 1.664994478225708, "learning_rate": 9.12712660823517e-05, "loss": 1.5583, "step": 15134 }, { "epoch": 0.5420165810159902, "grad_norm": 2.5544071197509766, "learning_rate": 9.125971139954835e-05, "loss": 1.6255, "step": 15135 }, { "epoch": 0.5420523931455584, "grad_norm": 1.645238995552063, "learning_rate": 9.124815683433432e-05, "loss": 1.2817, "step": 15136 }, { "epoch": 0.5420882052751267, "grad_norm": 1.7806026935577393, "learning_rate": 9.123660238686503e-05, "loss": 1.6258, "step": 15137 }, { "epoch": 0.542124017404695, "grad_norm": 1.9285032749176025, "learning_rate": 9.122504805729598e-05, "loss": 1.6613, "step": 15138 }, { "epoch": 0.5421598295342632, "grad_norm": 1.6295703649520874, "learning_rate": 9.121349384578255e-05, "loss": 1.3031, "step": 15139 }, { "epoch": 0.5421956416638315, "grad_norm": 1.7545650005340576, "learning_rate": 9.120193975248027e-05, "loss": 1.6079, "step": 15140 }, { "epoch": 0.5422314537933999, "grad_norm": 1.3660227060317993, "learning_rate": 9.119038577754451e-05, "loss": 1.2539, "step": 15141 }, { "epoch": 0.5422672659229681, "grad_norm": 1.3754740953445435, "learning_rate": 9.117883192113077e-05, "loss": 1.3293, "step": 15142 }, { "epoch": 0.5423030780525364, "grad_norm": 1.3937410116195679, "learning_rate": 9.116727818339444e-05, "loss": 1.6224, "step": 15143 }, { "epoch": 0.5423388901821047, "grad_norm": 2.4949188232421875, "learning_rate": 9.115572456449102e-05, "loss": 1.6398, "step": 15144 }, { "epoch": 0.542374702311673, "grad_norm": 1.8907426595687866, "learning_rate": 9.114417106457591e-05, "loss": 1.6803, "step": 15145 }, { "epoch": 0.5424105144412412, "grad_norm": 1.436632752418518, "learning_rate": 9.113261768380454e-05, "loss": 1.4615, "step": 15146 }, { "epoch": 0.5424463265708095, "grad_norm": 1.9328235387802124, "learning_rate": 9.112106442233237e-05, "loss": 1.47, "step": 15147 }, { "epoch": 0.5424821387003779, "grad_norm": 1.6240284442901611, "learning_rate": 9.110951128031482e-05, "loss": 1.2528, "step": 15148 }, { "epoch": 0.5425179508299461, "grad_norm": 1.9730523824691772, "learning_rate": 9.109795825790735e-05, "loss": 1.3464, "step": 15149 }, { "epoch": 0.5425537629595144, "grad_norm": 1.679447054862976, "learning_rate": 9.108640535526533e-05, "loss": 1.6473, "step": 15150 }, { "epoch": 0.5425895750890827, "grad_norm": 1.7408350706100464, "learning_rate": 9.107485257254426e-05, "loss": 1.5971, "step": 15151 }, { "epoch": 0.542625387218651, "grad_norm": 1.811719536781311, "learning_rate": 9.106329990989952e-05, "loss": 1.3407, "step": 15152 }, { "epoch": 0.5426611993482192, "grad_norm": 2.72822904586792, "learning_rate": 9.105174736748656e-05, "loss": 1.5943, "step": 15153 }, { "epoch": 0.5426970114777875, "grad_norm": 1.4611486196517944, "learning_rate": 9.104019494546081e-05, "loss": 1.3498, "step": 15154 }, { "epoch": 0.5427328236073559, "grad_norm": 1.499182105064392, "learning_rate": 9.102864264397765e-05, "loss": 1.4977, "step": 15155 }, { "epoch": 0.5427686357369241, "grad_norm": 1.542519211769104, "learning_rate": 9.101709046319256e-05, "loss": 1.1291, "step": 15156 }, { "epoch": 0.5428044478664924, "grad_norm": 1.6112427711486816, "learning_rate": 9.10055384032609e-05, "loss": 1.3433, "step": 15157 }, { "epoch": 0.5428402599960607, "grad_norm": 1.7602654695510864, "learning_rate": 9.099398646433814e-05, "loss": 1.6902, "step": 15158 }, { "epoch": 0.5428760721256289, "grad_norm": 1.9010089635849, "learning_rate": 9.098243464657966e-05, "loss": 1.7847, "step": 15159 }, { "epoch": 0.5429118842551972, "grad_norm": 2.192765235900879, "learning_rate": 9.097088295014092e-05, "loss": 1.4833, "step": 15160 }, { "epoch": 0.5429476963847655, "grad_norm": 2.360835313796997, "learning_rate": 9.095933137517727e-05, "loss": 1.9125, "step": 15161 }, { "epoch": 0.5429835085143339, "grad_norm": 1.5438072681427002, "learning_rate": 9.094777992184417e-05, "loss": 1.1242, "step": 15162 }, { "epoch": 0.5430193206439021, "grad_norm": 1.586403727531433, "learning_rate": 9.093622859029701e-05, "loss": 1.7543, "step": 15163 }, { "epoch": 0.5430551327734704, "grad_norm": 1.6280819177627563, "learning_rate": 9.09246773806912e-05, "loss": 1.2004, "step": 15164 }, { "epoch": 0.5430909449030387, "grad_norm": 2.1167192459106445, "learning_rate": 9.091312629318216e-05, "loss": 1.7255, "step": 15165 }, { "epoch": 0.5431267570326069, "grad_norm": 1.3767898082733154, "learning_rate": 9.090157532792526e-05, "loss": 1.4296, "step": 15166 }, { "epoch": 0.5431625691621752, "grad_norm": 1.5592973232269287, "learning_rate": 9.089002448507596e-05, "loss": 1.3892, "step": 15167 }, { "epoch": 0.5431983812917435, "grad_norm": 2.22155499458313, "learning_rate": 9.087847376478961e-05, "loss": 1.269, "step": 15168 }, { "epoch": 0.5432341934213119, "grad_norm": 1.561850905418396, "learning_rate": 9.086692316722166e-05, "loss": 1.3457, "step": 15169 }, { "epoch": 0.5432700055508801, "grad_norm": 1.6075046062469482, "learning_rate": 9.085537269252747e-05, "loss": 1.3777, "step": 15170 }, { "epoch": 0.5433058176804484, "grad_norm": 1.8987433910369873, "learning_rate": 9.08438223408624e-05, "loss": 1.2371, "step": 15171 }, { "epoch": 0.5433416298100167, "grad_norm": 1.4926401376724243, "learning_rate": 9.083227211238192e-05, "loss": 1.4493, "step": 15172 }, { "epoch": 0.5433774419395849, "grad_norm": 2.8346004486083984, "learning_rate": 9.082072200724139e-05, "loss": 1.5074, "step": 15173 }, { "epoch": 0.5434132540691532, "grad_norm": 1.5992660522460938, "learning_rate": 9.08091720255962e-05, "loss": 1.1084, "step": 15174 }, { "epoch": 0.5434490661987215, "grad_norm": 1.871173620223999, "learning_rate": 9.079762216760174e-05, "loss": 1.3968, "step": 15175 }, { "epoch": 0.5434848783282898, "grad_norm": 2.4197909832000732, "learning_rate": 9.078607243341344e-05, "loss": 1.8805, "step": 15176 }, { "epoch": 0.5435206904578581, "grad_norm": 1.300917387008667, "learning_rate": 9.077452282318661e-05, "loss": 1.3799, "step": 15177 }, { "epoch": 0.5435565025874264, "grad_norm": 1.500741720199585, "learning_rate": 9.076297333707669e-05, "loss": 1.3054, "step": 15178 }, { "epoch": 0.5435923147169947, "grad_norm": 1.7115274667739868, "learning_rate": 9.07514239752391e-05, "loss": 1.2259, "step": 15179 }, { "epoch": 0.5436281268465629, "grad_norm": 1.6599082946777344, "learning_rate": 9.073987473782907e-05, "loss": 1.7227, "step": 15180 }, { "epoch": 0.5436639389761312, "grad_norm": 1.701534390449524, "learning_rate": 9.072832562500217e-05, "loss": 1.5188, "step": 15181 }, { "epoch": 0.5436997511056995, "grad_norm": 1.621097445487976, "learning_rate": 9.071677663691361e-05, "loss": 1.3214, "step": 15182 }, { "epoch": 0.5437355632352678, "grad_norm": 1.8554396629333496, "learning_rate": 9.070522777371892e-05, "loss": 2.0134, "step": 15183 }, { "epoch": 0.5437713753648361, "grad_norm": 1.690596342086792, "learning_rate": 9.069367903557333e-05, "loss": 1.5976, "step": 15184 }, { "epoch": 0.5438071874944044, "grad_norm": 1.5468312501907349, "learning_rate": 9.068213042263234e-05, "loss": 1.3421, "step": 15185 }, { "epoch": 0.5438429996239726, "grad_norm": 1.7078195810317993, "learning_rate": 9.067058193505124e-05, "loss": 1.5431, "step": 15186 }, { "epoch": 0.5438788117535409, "grad_norm": 1.6992605924606323, "learning_rate": 9.065903357298544e-05, "loss": 1.4592, "step": 15187 }, { "epoch": 0.5439146238831092, "grad_norm": 1.696571707725525, "learning_rate": 9.064748533659031e-05, "loss": 1.644, "step": 15188 }, { "epoch": 0.5439504360126775, "grad_norm": 1.737825632095337, "learning_rate": 9.063593722602115e-05, "loss": 1.2788, "step": 15189 }, { "epoch": 0.5439862481422458, "grad_norm": 2.366448402404785, "learning_rate": 9.062438924143344e-05, "loss": 1.4315, "step": 15190 }, { "epoch": 0.5440220602718141, "grad_norm": 2.0418972969055176, "learning_rate": 9.06128413829824e-05, "loss": 1.4927, "step": 15191 }, { "epoch": 0.5440578724013824, "grad_norm": 2.0547597408294678, "learning_rate": 9.060129365082354e-05, "loss": 1.5544, "step": 15192 }, { "epoch": 0.5440936845309506, "grad_norm": 2.0122506618499756, "learning_rate": 9.05897460451121e-05, "loss": 1.5898, "step": 15193 }, { "epoch": 0.5441294966605189, "grad_norm": 2.682504177093506, "learning_rate": 9.057819856600355e-05, "loss": 1.5509, "step": 15194 }, { "epoch": 0.5441653087900872, "grad_norm": 2.28589129447937, "learning_rate": 9.056665121365311e-05, "loss": 1.5996, "step": 15195 }, { "epoch": 0.5442011209196554, "grad_norm": 1.6263935565948486, "learning_rate": 9.055510398821627e-05, "loss": 1.2338, "step": 15196 }, { "epoch": 0.5442369330492238, "grad_norm": 1.4128479957580566, "learning_rate": 9.054355688984833e-05, "loss": 1.6509, "step": 15197 }, { "epoch": 0.5442727451787921, "grad_norm": 1.4908677339553833, "learning_rate": 9.053200991870456e-05, "loss": 1.0973, "step": 15198 }, { "epoch": 0.5443085573083604, "grad_norm": 2.3797690868377686, "learning_rate": 9.052046307494046e-05, "loss": 1.4909, "step": 15199 }, { "epoch": 0.5443443694379286, "grad_norm": 1.5624659061431885, "learning_rate": 9.050891635871124e-05, "loss": 1.1965, "step": 15200 }, { "epoch": 0.5443801815674969, "grad_norm": 1.94329833984375, "learning_rate": 9.049736977017236e-05, "loss": 1.661, "step": 15201 }, { "epoch": 0.5444159936970652, "grad_norm": 1.6274837255477905, "learning_rate": 9.048582330947906e-05, "loss": 1.4358, "step": 15202 }, { "epoch": 0.5444518058266334, "grad_norm": 2.1027121543884277, "learning_rate": 9.04742769767868e-05, "loss": 1.8225, "step": 15203 }, { "epoch": 0.5444876179562018, "grad_norm": 1.6133577823638916, "learning_rate": 9.046273077225078e-05, "loss": 1.5472, "step": 15204 }, { "epoch": 0.5445234300857701, "grad_norm": 1.9349349737167358, "learning_rate": 9.045118469602649e-05, "loss": 1.4636, "step": 15205 }, { "epoch": 0.5445592422153384, "grad_norm": 1.7289633750915527, "learning_rate": 9.043963874826917e-05, "loss": 1.6001, "step": 15206 }, { "epoch": 0.5445950543449066, "grad_norm": 1.6920100450515747, "learning_rate": 9.042809292913415e-05, "loss": 1.3169, "step": 15207 }, { "epoch": 0.5446308664744749, "grad_norm": 1.8096444606781006, "learning_rate": 9.041654723877683e-05, "loss": 1.2999, "step": 15208 }, { "epoch": 0.5446666786040432, "grad_norm": 1.588729977607727, "learning_rate": 9.040500167735247e-05, "loss": 1.7168, "step": 15209 }, { "epoch": 0.5447024907336114, "grad_norm": 1.802148699760437, "learning_rate": 9.039345624501646e-05, "loss": 1.3229, "step": 15210 }, { "epoch": 0.5447383028631798, "grad_norm": 1.531667947769165, "learning_rate": 9.038191094192407e-05, "loss": 1.3616, "step": 15211 }, { "epoch": 0.5447741149927481, "grad_norm": 1.4396610260009766, "learning_rate": 9.037036576823072e-05, "loss": 0.9985, "step": 15212 }, { "epoch": 0.5448099271223164, "grad_norm": 1.5046268701553345, "learning_rate": 9.035882072409161e-05, "loss": 1.5455, "step": 15213 }, { "epoch": 0.5448457392518846, "grad_norm": 2.3657383918762207, "learning_rate": 9.034727580966219e-05, "loss": 1.8284, "step": 15214 }, { "epoch": 0.5448815513814529, "grad_norm": 1.6111987829208374, "learning_rate": 9.033573102509771e-05, "loss": 1.4184, "step": 15215 }, { "epoch": 0.5449173635110212, "grad_norm": 1.466270923614502, "learning_rate": 9.032418637055348e-05, "loss": 1.4768, "step": 15216 }, { "epoch": 0.5449531756405894, "grad_norm": 1.4635989665985107, "learning_rate": 9.031264184618487e-05, "loss": 1.4865, "step": 15217 }, { "epoch": 0.5449889877701578, "grad_norm": 2.0012905597686768, "learning_rate": 9.030109745214713e-05, "loss": 1.5261, "step": 15218 }, { "epoch": 0.5450247998997261, "grad_norm": 1.6931449174880981, "learning_rate": 9.028955318859564e-05, "loss": 1.3085, "step": 15219 }, { "epoch": 0.5450606120292943, "grad_norm": 1.5689709186553955, "learning_rate": 9.027800905568568e-05, "loss": 1.5905, "step": 15220 }, { "epoch": 0.5450964241588626, "grad_norm": 1.997705101966858, "learning_rate": 9.026646505357258e-05, "loss": 1.835, "step": 15221 }, { "epoch": 0.5451322362884309, "grad_norm": 1.988075852394104, "learning_rate": 9.025492118241161e-05, "loss": 1.92, "step": 15222 }, { "epoch": 0.5451680484179992, "grad_norm": 1.7597702741622925, "learning_rate": 9.024337744235814e-05, "loss": 1.6645, "step": 15223 }, { "epoch": 0.5452038605475674, "grad_norm": 1.6029980182647705, "learning_rate": 9.023183383356743e-05, "loss": 1.5673, "step": 15224 }, { "epoch": 0.5452396726771358, "grad_norm": 1.7730108499526978, "learning_rate": 9.022029035619478e-05, "loss": 1.4413, "step": 15225 }, { "epoch": 0.5452754848067041, "grad_norm": 1.6495401859283447, "learning_rate": 9.020874701039552e-05, "loss": 1.5705, "step": 15226 }, { "epoch": 0.5453112969362723, "grad_norm": 2.009526252746582, "learning_rate": 9.019720379632493e-05, "loss": 1.4947, "step": 15227 }, { "epoch": 0.5453471090658406, "grad_norm": 1.3570290803909302, "learning_rate": 9.018566071413833e-05, "loss": 1.4492, "step": 15228 }, { "epoch": 0.5453829211954089, "grad_norm": 1.9762985706329346, "learning_rate": 9.017411776399099e-05, "loss": 1.8505, "step": 15229 }, { "epoch": 0.5454187333249771, "grad_norm": 1.482008695602417, "learning_rate": 9.016257494603824e-05, "loss": 1.0744, "step": 15230 }, { "epoch": 0.5454545454545454, "grad_norm": 1.898165225982666, "learning_rate": 9.015103226043533e-05, "loss": 1.5189, "step": 15231 }, { "epoch": 0.5454903575841138, "grad_norm": 1.4939240217208862, "learning_rate": 9.01394897073376e-05, "loss": 1.5322, "step": 15232 }, { "epoch": 0.5455261697136821, "grad_norm": 1.3671813011169434, "learning_rate": 9.012794728690032e-05, "loss": 1.5075, "step": 15233 }, { "epoch": 0.5455619818432503, "grad_norm": 1.485896348953247, "learning_rate": 9.011640499927875e-05, "loss": 1.589, "step": 15234 }, { "epoch": 0.5455977939728186, "grad_norm": 1.7984567880630493, "learning_rate": 9.010486284462823e-05, "loss": 1.4442, "step": 15235 }, { "epoch": 0.5456336061023869, "grad_norm": 1.7843817472457886, "learning_rate": 9.009332082310398e-05, "loss": 1.3122, "step": 15236 }, { "epoch": 0.5456694182319551, "grad_norm": 1.760123610496521, "learning_rate": 9.008177893486136e-05, "loss": 1.5041, "step": 15237 }, { "epoch": 0.5457052303615234, "grad_norm": 1.4291465282440186, "learning_rate": 9.007023718005558e-05, "loss": 1.6004, "step": 15238 }, { "epoch": 0.5457410424910918, "grad_norm": 2.074390172958374, "learning_rate": 9.005869555884197e-05, "loss": 1.4731, "step": 15239 }, { "epoch": 0.5457768546206601, "grad_norm": 1.5591212511062622, "learning_rate": 9.004715407137577e-05, "loss": 1.4422, "step": 15240 }, { "epoch": 0.5458126667502283, "grad_norm": 1.5369257926940918, "learning_rate": 9.003561271781229e-05, "loss": 1.4392, "step": 15241 }, { "epoch": 0.5458484788797966, "grad_norm": 1.3781731128692627, "learning_rate": 9.002407149830679e-05, "loss": 1.5801, "step": 15242 }, { "epoch": 0.5458842910093649, "grad_norm": 1.7291080951690674, "learning_rate": 9.001253041301453e-05, "loss": 1.6995, "step": 15243 }, { "epoch": 0.5459201031389331, "grad_norm": 1.5849418640136719, "learning_rate": 9.00009894620908e-05, "loss": 1.5973, "step": 15244 }, { "epoch": 0.5459559152685014, "grad_norm": 1.6605151891708374, "learning_rate": 8.998944864569084e-05, "loss": 1.6434, "step": 15245 }, { "epoch": 0.5459917273980698, "grad_norm": 1.9970816373825073, "learning_rate": 8.997790796396996e-05, "loss": 1.8821, "step": 15246 }, { "epoch": 0.546027539527638, "grad_norm": 1.8191704750061035, "learning_rate": 8.996636741708337e-05, "loss": 1.7131, "step": 15247 }, { "epoch": 0.5460633516572063, "grad_norm": 1.318717122077942, "learning_rate": 8.995482700518639e-05, "loss": 1.3839, "step": 15248 }, { "epoch": 0.5460991637867746, "grad_norm": 1.5386637449264526, "learning_rate": 8.994328672843424e-05, "loss": 1.74, "step": 15249 }, { "epoch": 0.5461349759163429, "grad_norm": 1.6829789876937866, "learning_rate": 8.993174658698221e-05, "loss": 1.3832, "step": 15250 }, { "epoch": 0.5461707880459111, "grad_norm": 2.5032663345336914, "learning_rate": 8.992020658098555e-05, "loss": 1.4875, "step": 15251 }, { "epoch": 0.5462066001754794, "grad_norm": 2.0662424564361572, "learning_rate": 8.990866671059948e-05, "loss": 1.5559, "step": 15252 }, { "epoch": 0.5462424123050477, "grad_norm": 1.9819056987762451, "learning_rate": 8.98971269759793e-05, "loss": 1.7462, "step": 15253 }, { "epoch": 0.546278224434616, "grad_norm": 1.755515456199646, "learning_rate": 8.988558737728023e-05, "loss": 1.4854, "step": 15254 }, { "epoch": 0.5463140365641843, "grad_norm": 1.8276987075805664, "learning_rate": 8.987404791465757e-05, "loss": 1.5511, "step": 15255 }, { "epoch": 0.5463498486937526, "grad_norm": 1.7192363739013672, "learning_rate": 8.986250858826649e-05, "loss": 1.7131, "step": 15256 }, { "epoch": 0.5463856608233209, "grad_norm": 1.8038772344589233, "learning_rate": 8.985096939826231e-05, "loss": 1.4206, "step": 15257 }, { "epoch": 0.5464214729528891, "grad_norm": 2.844478130340576, "learning_rate": 8.983943034480022e-05, "loss": 1.8703, "step": 15258 }, { "epoch": 0.5464572850824574, "grad_norm": 1.6079673767089844, "learning_rate": 8.982789142803552e-05, "loss": 1.0447, "step": 15259 }, { "epoch": 0.5464930972120257, "grad_norm": 1.8626993894577026, "learning_rate": 8.981635264812341e-05, "loss": 1.5425, "step": 15260 }, { "epoch": 0.546528909341594, "grad_norm": 1.9732133150100708, "learning_rate": 8.980481400521911e-05, "loss": 1.7118, "step": 15261 }, { "epoch": 0.5465647214711623, "grad_norm": 1.683758020401001, "learning_rate": 8.979327549947794e-05, "loss": 1.7804, "step": 15262 }, { "epoch": 0.5466005336007306, "grad_norm": 2.286837339401245, "learning_rate": 8.978173713105503e-05, "loss": 1.6872, "step": 15263 }, { "epoch": 0.5466363457302988, "grad_norm": 2.094520330429077, "learning_rate": 8.977019890010571e-05, "loss": 1.5192, "step": 15264 }, { "epoch": 0.5466721578598671, "grad_norm": 1.6780959367752075, "learning_rate": 8.975866080678512e-05, "loss": 1.451, "step": 15265 }, { "epoch": 0.5467079699894354, "grad_norm": 1.653809905052185, "learning_rate": 8.974712285124858e-05, "loss": 1.4063, "step": 15266 }, { "epoch": 0.5467437821190037, "grad_norm": 1.4220283031463623, "learning_rate": 8.973558503365129e-05, "loss": 1.5108, "step": 15267 }, { "epoch": 0.546779594248572, "grad_norm": 2.0603954792022705, "learning_rate": 8.97240473541484e-05, "loss": 1.6627, "step": 15268 }, { "epoch": 0.5468154063781403, "grad_norm": 1.338783621788025, "learning_rate": 8.971250981289525e-05, "loss": 1.4841, "step": 15269 }, { "epoch": 0.5468512185077086, "grad_norm": 2.398456573486328, "learning_rate": 8.970097241004697e-05, "loss": 1.5663, "step": 15270 }, { "epoch": 0.5468870306372768, "grad_norm": 1.6194860935211182, "learning_rate": 8.968943514575888e-05, "loss": 1.6908, "step": 15271 }, { "epoch": 0.5469228427668451, "grad_norm": 1.4247815608978271, "learning_rate": 8.967789802018607e-05, "loss": 1.4542, "step": 15272 }, { "epoch": 0.5469586548964134, "grad_norm": 1.889357566833496, "learning_rate": 8.966636103348388e-05, "loss": 1.7865, "step": 15273 }, { "epoch": 0.5469944670259816, "grad_norm": 1.6137316226959229, "learning_rate": 8.965482418580746e-05, "loss": 1.8113, "step": 15274 }, { "epoch": 0.54703027915555, "grad_norm": 1.8220938444137573, "learning_rate": 8.964328747731204e-05, "loss": 1.4244, "step": 15275 }, { "epoch": 0.5470660912851183, "grad_norm": 1.8542633056640625, "learning_rate": 8.963175090815285e-05, "loss": 1.32, "step": 15276 }, { "epoch": 0.5471019034146866, "grad_norm": 1.4733332395553589, "learning_rate": 8.9620214478485e-05, "loss": 1.2989, "step": 15277 }, { "epoch": 0.5471377155442548, "grad_norm": 1.4526832103729248, "learning_rate": 8.960867818846386e-05, "loss": 1.1323, "step": 15278 }, { "epoch": 0.5471735276738231, "grad_norm": 1.3265506029129028, "learning_rate": 8.959714203824449e-05, "loss": 1.397, "step": 15279 }, { "epoch": 0.5472093398033914, "grad_norm": 2.6252152919769287, "learning_rate": 8.958560602798221e-05, "loss": 1.7199, "step": 15280 }, { "epoch": 0.5472451519329596, "grad_norm": 1.9414409399032593, "learning_rate": 8.95740701578321e-05, "loss": 1.5143, "step": 15281 }, { "epoch": 0.547280964062528, "grad_norm": 1.725482702255249, "learning_rate": 8.956253442794948e-05, "loss": 1.1287, "step": 15282 }, { "epoch": 0.5473167761920963, "grad_norm": 1.5736626386642456, "learning_rate": 8.955099883848945e-05, "loss": 1.4276, "step": 15283 }, { "epoch": 0.5473525883216646, "grad_norm": 1.4038923978805542, "learning_rate": 8.953946338960731e-05, "loss": 1.2686, "step": 15284 }, { "epoch": 0.5473884004512328, "grad_norm": 1.9812275171279907, "learning_rate": 8.952792808145819e-05, "loss": 1.5779, "step": 15285 }, { "epoch": 0.5474242125808011, "grad_norm": 1.4937734603881836, "learning_rate": 8.951639291419723e-05, "loss": 1.4877, "step": 15286 }, { "epoch": 0.5474600247103694, "grad_norm": 1.4445714950561523, "learning_rate": 8.950485788797976e-05, "loss": 1.6559, "step": 15287 }, { "epoch": 0.5474958368399376, "grad_norm": 1.952468752861023, "learning_rate": 8.949332300296082e-05, "loss": 1.5121, "step": 15288 }, { "epoch": 0.547531648969506, "grad_norm": 1.7824208736419678, "learning_rate": 8.948178825929572e-05, "loss": 1.3028, "step": 15289 }, { "epoch": 0.5475674610990743, "grad_norm": 1.73703134059906, "learning_rate": 8.947025365713953e-05, "loss": 1.5888, "step": 15290 }, { "epoch": 0.5476032732286426, "grad_norm": 1.6486480236053467, "learning_rate": 8.945871919664757e-05, "loss": 1.1964, "step": 15291 }, { "epoch": 0.5476390853582108, "grad_norm": 1.4871211051940918, "learning_rate": 8.944718487797487e-05, "loss": 1.2785, "step": 15292 }, { "epoch": 0.5476748974877791, "grad_norm": 1.483238697052002, "learning_rate": 8.943565070127676e-05, "loss": 1.6927, "step": 15293 }, { "epoch": 0.5477107096173474, "grad_norm": 1.6315890550613403, "learning_rate": 8.94241166667083e-05, "loss": 1.4549, "step": 15294 }, { "epoch": 0.5477465217469156, "grad_norm": 2.340033769607544, "learning_rate": 8.94125827744247e-05, "loss": 1.6152, "step": 15295 }, { "epoch": 0.547782333876484, "grad_norm": 1.4662576913833618, "learning_rate": 8.940104902458117e-05, "loss": 1.6437, "step": 15296 }, { "epoch": 0.5478181460060523, "grad_norm": 1.7731101512908936, "learning_rate": 8.938951541733282e-05, "loss": 1.377, "step": 15297 }, { "epoch": 0.5478539581356205, "grad_norm": 1.815285563468933, "learning_rate": 8.93779819528349e-05, "loss": 1.4197, "step": 15298 }, { "epoch": 0.5478897702651888, "grad_norm": 2.2570080757141113, "learning_rate": 8.936644863124246e-05, "loss": 1.6707, "step": 15299 }, { "epoch": 0.5479255823947571, "grad_norm": 1.9335801601409912, "learning_rate": 8.935491545271081e-05, "loss": 1.5724, "step": 15300 }, { "epoch": 0.5479613945243254, "grad_norm": 1.3450572490692139, "learning_rate": 8.934338241739498e-05, "loss": 0.9793, "step": 15301 }, { "epoch": 0.5479972066538936, "grad_norm": 2.0288915634155273, "learning_rate": 8.933184952545026e-05, "loss": 1.5523, "step": 15302 }, { "epoch": 0.548033018783462, "grad_norm": 1.486048936843872, "learning_rate": 8.932031677703172e-05, "loss": 1.1013, "step": 15303 }, { "epoch": 0.5480688309130303, "grad_norm": 1.9262478351593018, "learning_rate": 8.930878417229453e-05, "loss": 1.5825, "step": 15304 }, { "epoch": 0.5481046430425985, "grad_norm": 2.0612335205078125, "learning_rate": 8.929725171139387e-05, "loss": 1.3004, "step": 15305 }, { "epoch": 0.5481404551721668, "grad_norm": 2.523592948913574, "learning_rate": 8.928571939448486e-05, "loss": 1.8698, "step": 15306 }, { "epoch": 0.5481762673017351, "grad_norm": 1.6707018613815308, "learning_rate": 8.927418722172269e-05, "loss": 1.5277, "step": 15307 }, { "epoch": 0.5482120794313033, "grad_norm": 1.4799731969833374, "learning_rate": 8.926265519326246e-05, "loss": 1.1901, "step": 15308 }, { "epoch": 0.5482478915608716, "grad_norm": 1.638659119606018, "learning_rate": 8.925112330925943e-05, "loss": 1.4529, "step": 15309 }, { "epoch": 0.54828370369044, "grad_norm": 1.3840322494506836, "learning_rate": 8.923959156986859e-05, "loss": 1.625, "step": 15310 }, { "epoch": 0.5483195158200083, "grad_norm": 1.525613784790039, "learning_rate": 8.922805997524524e-05, "loss": 1.5994, "step": 15311 }, { "epoch": 0.5483553279495765, "grad_norm": 2.075780153274536, "learning_rate": 8.921652852554442e-05, "loss": 1.9281, "step": 15312 }, { "epoch": 0.5483911400791448, "grad_norm": 1.4483722448349, "learning_rate": 8.920499722092129e-05, "loss": 1.3819, "step": 15313 }, { "epoch": 0.5484269522087131, "grad_norm": 2.663997173309326, "learning_rate": 8.9193466061531e-05, "loss": 1.02, "step": 15314 }, { "epoch": 0.5484627643382813, "grad_norm": 1.449901819229126, "learning_rate": 8.918193504752868e-05, "loss": 1.4184, "step": 15315 }, { "epoch": 0.5484985764678496, "grad_norm": 1.5887750387191772, "learning_rate": 8.917040417906947e-05, "loss": 1.4567, "step": 15316 }, { "epoch": 0.548534388597418, "grad_norm": 1.6006042957305908, "learning_rate": 8.91588734563085e-05, "loss": 1.5301, "step": 15317 }, { "epoch": 0.5485702007269863, "grad_norm": 1.9737050533294678, "learning_rate": 8.914734287940092e-05, "loss": 1.5652, "step": 15318 }, { "epoch": 0.5486060128565545, "grad_norm": 1.3489166498184204, "learning_rate": 8.913581244850182e-05, "loss": 1.2951, "step": 15319 }, { "epoch": 0.5486418249861228, "grad_norm": 1.744086503982544, "learning_rate": 8.912428216376637e-05, "loss": 1.4222, "step": 15320 }, { "epoch": 0.5486776371156911, "grad_norm": 1.5901646614074707, "learning_rate": 8.911275202534968e-05, "loss": 1.3965, "step": 15321 }, { "epoch": 0.5487134492452593, "grad_norm": 2.381009578704834, "learning_rate": 8.910122203340684e-05, "loss": 1.7014, "step": 15322 }, { "epoch": 0.5487492613748276, "grad_norm": 1.773105263710022, "learning_rate": 8.908969218809302e-05, "loss": 1.2225, "step": 15323 }, { "epoch": 0.548785073504396, "grad_norm": 1.7156941890716553, "learning_rate": 8.907816248956331e-05, "loss": 1.5767, "step": 15324 }, { "epoch": 0.5488208856339643, "grad_norm": 2.0456743240356445, "learning_rate": 8.906663293797284e-05, "loss": 1.7365, "step": 15325 }, { "epoch": 0.5488566977635325, "grad_norm": 1.5815179347991943, "learning_rate": 8.905510353347671e-05, "loss": 1.5657, "step": 15326 }, { "epoch": 0.5488925098931008, "grad_norm": 1.702483892440796, "learning_rate": 8.904357427623007e-05, "loss": 1.6139, "step": 15327 }, { "epoch": 0.5489283220226691, "grad_norm": 1.5035403966903687, "learning_rate": 8.903204516638796e-05, "loss": 1.515, "step": 15328 }, { "epoch": 0.5489641341522373, "grad_norm": 1.8640391826629639, "learning_rate": 8.902051620410558e-05, "loss": 1.5362, "step": 15329 }, { "epoch": 0.5489999462818056, "grad_norm": 1.583310842514038, "learning_rate": 8.9008987389538e-05, "loss": 1.5499, "step": 15330 }, { "epoch": 0.549035758411374, "grad_norm": 1.6002247333526611, "learning_rate": 8.899745872284026e-05, "loss": 1.1217, "step": 15331 }, { "epoch": 0.5490715705409422, "grad_norm": 1.4390969276428223, "learning_rate": 8.898593020416756e-05, "loss": 1.6414, "step": 15332 }, { "epoch": 0.5491073826705105, "grad_norm": 1.7034553289413452, "learning_rate": 8.897440183367496e-05, "loss": 1.549, "step": 15333 }, { "epoch": 0.5491431948000788, "grad_norm": 1.4598033428192139, "learning_rate": 8.896287361151757e-05, "loss": 1.4662, "step": 15334 }, { "epoch": 0.549179006929647, "grad_norm": 2.3608148097991943, "learning_rate": 8.895134553785044e-05, "loss": 1.5284, "step": 15335 }, { "epoch": 0.5492148190592153, "grad_norm": 1.7964868545532227, "learning_rate": 8.893981761282874e-05, "loss": 1.5421, "step": 15336 }, { "epoch": 0.5492506311887836, "grad_norm": 1.50372314453125, "learning_rate": 8.89282898366075e-05, "loss": 1.2766, "step": 15337 }, { "epoch": 0.549286443318352, "grad_norm": 1.470895767211914, "learning_rate": 8.891676220934188e-05, "loss": 1.2957, "step": 15338 }, { "epoch": 0.5493222554479202, "grad_norm": 1.9627442359924316, "learning_rate": 8.89052347311869e-05, "loss": 1.5351, "step": 15339 }, { "epoch": 0.5493580675774885, "grad_norm": 2.062045097351074, "learning_rate": 8.889370740229767e-05, "loss": 1.4394, "step": 15340 }, { "epoch": 0.5493938797070568, "grad_norm": 1.9214800596237183, "learning_rate": 8.88821802228293e-05, "loss": 1.3882, "step": 15341 }, { "epoch": 0.549429691836625, "grad_norm": 1.5839799642562866, "learning_rate": 8.887065319293684e-05, "loss": 1.427, "step": 15342 }, { "epoch": 0.5494655039661933, "grad_norm": 1.8803417682647705, "learning_rate": 8.88591263127754e-05, "loss": 1.4825, "step": 15343 }, { "epoch": 0.5495013160957616, "grad_norm": 1.5140496492385864, "learning_rate": 8.884759958250002e-05, "loss": 1.4992, "step": 15344 }, { "epoch": 0.54953712822533, "grad_norm": 1.6109691858291626, "learning_rate": 8.883607300226581e-05, "loss": 1.3751, "step": 15345 }, { "epoch": 0.5495729403548982, "grad_norm": 2.227198600769043, "learning_rate": 8.882454657222784e-05, "loss": 1.6037, "step": 15346 }, { "epoch": 0.5496087524844665, "grad_norm": 1.5396227836608887, "learning_rate": 8.88130202925412e-05, "loss": 1.4925, "step": 15347 }, { "epoch": 0.5496445646140348, "grad_norm": 2.18083119392395, "learning_rate": 8.880149416336093e-05, "loss": 1.7599, "step": 15348 }, { "epoch": 0.549680376743603, "grad_norm": 1.8912416696548462, "learning_rate": 8.878996818484209e-05, "loss": 1.5699, "step": 15349 }, { "epoch": 0.5497161888731713, "grad_norm": 1.3284300565719604, "learning_rate": 8.87784423571398e-05, "loss": 1.4352, "step": 15350 }, { "epoch": 0.5497520010027396, "grad_norm": 1.830657720565796, "learning_rate": 8.876691668040907e-05, "loss": 1.5481, "step": 15351 }, { "epoch": 0.549787813132308, "grad_norm": 2.0324718952178955, "learning_rate": 8.8755391154805e-05, "loss": 1.5041, "step": 15352 }, { "epoch": 0.5498236252618762, "grad_norm": 1.8235435485839844, "learning_rate": 8.874386578048261e-05, "loss": 1.2626, "step": 15353 }, { "epoch": 0.5498594373914445, "grad_norm": 1.490576148033142, "learning_rate": 8.873234055759703e-05, "loss": 1.5272, "step": 15354 }, { "epoch": 0.5498952495210128, "grad_norm": 1.8105982542037964, "learning_rate": 8.872081548630325e-05, "loss": 1.4809, "step": 15355 }, { "epoch": 0.549931061650581, "grad_norm": 1.976482629776001, "learning_rate": 8.870929056675636e-05, "loss": 1.597, "step": 15356 }, { "epoch": 0.5499668737801493, "grad_norm": 1.8228706121444702, "learning_rate": 8.86977657991114e-05, "loss": 1.7323, "step": 15357 }, { "epoch": 0.5500026859097176, "grad_norm": 1.821420669555664, "learning_rate": 8.86862411835234e-05, "loss": 1.3638, "step": 15358 }, { "epoch": 0.550038498039286, "grad_norm": 1.6070480346679688, "learning_rate": 8.867471672014745e-05, "loss": 1.3667, "step": 15359 }, { "epoch": 0.5500743101688542, "grad_norm": 1.3694812059402466, "learning_rate": 8.866319240913856e-05, "loss": 1.2689, "step": 15360 }, { "epoch": 0.5501101222984225, "grad_norm": 1.4904698133468628, "learning_rate": 8.865166825065182e-05, "loss": 1.3611, "step": 15361 }, { "epoch": 0.5501459344279908, "grad_norm": 1.2036080360412598, "learning_rate": 8.864014424484222e-05, "loss": 1.2641, "step": 15362 }, { "epoch": 0.550181746557559, "grad_norm": 1.5762604475021362, "learning_rate": 8.862862039186485e-05, "loss": 1.6896, "step": 15363 }, { "epoch": 0.5502175586871273, "grad_norm": 2.1189725399017334, "learning_rate": 8.861709669187474e-05, "loss": 1.4898, "step": 15364 }, { "epoch": 0.5502533708166956, "grad_norm": 1.6944526433944702, "learning_rate": 8.860557314502685e-05, "loss": 1.5987, "step": 15365 }, { "epoch": 0.5502891829462639, "grad_norm": 1.749204158782959, "learning_rate": 8.859404975147632e-05, "loss": 1.3471, "step": 15366 }, { "epoch": 0.5503249950758322, "grad_norm": 1.700831651687622, "learning_rate": 8.858252651137812e-05, "loss": 1.526, "step": 15367 }, { "epoch": 0.5503608072054005, "grad_norm": 1.3145804405212402, "learning_rate": 8.857100342488732e-05, "loss": 1.4017, "step": 15368 }, { "epoch": 0.5503966193349688, "grad_norm": 1.3753552436828613, "learning_rate": 8.855948049215888e-05, "loss": 1.5874, "step": 15369 }, { "epoch": 0.550432431464537, "grad_norm": 1.5876967906951904, "learning_rate": 8.854795771334794e-05, "loss": 1.537, "step": 15370 }, { "epoch": 0.5504682435941053, "grad_norm": 1.6385436058044434, "learning_rate": 8.85364350886094e-05, "loss": 1.3111, "step": 15371 }, { "epoch": 0.5505040557236736, "grad_norm": 2.4700660705566406, "learning_rate": 8.852491261809837e-05, "loss": 1.7414, "step": 15372 }, { "epoch": 0.5505398678532419, "grad_norm": 1.4579800367355347, "learning_rate": 8.851339030196986e-05, "loss": 1.4192, "step": 15373 }, { "epoch": 0.5505756799828102, "grad_norm": 1.7124450206756592, "learning_rate": 8.85018681403788e-05, "loss": 1.2621, "step": 15374 }, { "epoch": 0.5506114921123785, "grad_norm": 1.5826530456542969, "learning_rate": 8.849034613348035e-05, "loss": 1.3615, "step": 15375 }, { "epoch": 0.5506473042419467, "grad_norm": 1.3330745697021484, "learning_rate": 8.847882428142936e-05, "loss": 1.3821, "step": 15376 }, { "epoch": 0.550683116371515, "grad_norm": 1.582822322845459, "learning_rate": 8.8467302584381e-05, "loss": 1.4889, "step": 15377 }, { "epoch": 0.5507189285010833, "grad_norm": 1.2767612934112549, "learning_rate": 8.845578104249014e-05, "loss": 1.3505, "step": 15378 }, { "epoch": 0.5507547406306516, "grad_norm": 1.4788082838058472, "learning_rate": 8.844425965591192e-05, "loss": 1.5282, "step": 15379 }, { "epoch": 0.5507905527602199, "grad_norm": 1.8105095624923706, "learning_rate": 8.843273842480124e-05, "loss": 1.425, "step": 15380 }, { "epoch": 0.5508263648897882, "grad_norm": 1.4543743133544922, "learning_rate": 8.842121734931316e-05, "loss": 1.6334, "step": 15381 }, { "epoch": 0.5508621770193565, "grad_norm": 1.4791871309280396, "learning_rate": 8.840969642960271e-05, "loss": 1.3785, "step": 15382 }, { "epoch": 0.5508979891489247, "grad_norm": 1.55195152759552, "learning_rate": 8.839817566582477e-05, "loss": 1.3623, "step": 15383 }, { "epoch": 0.550933801278493, "grad_norm": 1.8789154291152954, "learning_rate": 8.838665505813448e-05, "loss": 1.3948, "step": 15384 }, { "epoch": 0.5509696134080613, "grad_norm": 1.813826560974121, "learning_rate": 8.837513460668668e-05, "loss": 1.4754, "step": 15385 }, { "epoch": 0.5510054255376295, "grad_norm": 1.9964311122894287, "learning_rate": 8.836361431163653e-05, "loss": 1.4923, "step": 15386 }, { "epoch": 0.5510412376671979, "grad_norm": 1.5974700450897217, "learning_rate": 8.835209417313886e-05, "loss": 1.6589, "step": 15387 }, { "epoch": 0.5510770497967662, "grad_norm": 1.9215779304504395, "learning_rate": 8.834057419134883e-05, "loss": 1.7583, "step": 15388 }, { "epoch": 0.5511128619263345, "grad_norm": 1.61617910861969, "learning_rate": 8.832905436642125e-05, "loss": 1.4681, "step": 15389 }, { "epoch": 0.5511486740559027, "grad_norm": 1.5911004543304443, "learning_rate": 8.831753469851126e-05, "loss": 1.537, "step": 15390 }, { "epoch": 0.551184486185471, "grad_norm": 1.8469878435134888, "learning_rate": 8.830601518777375e-05, "loss": 1.4466, "step": 15391 }, { "epoch": 0.5512202983150393, "grad_norm": 1.4108341932296753, "learning_rate": 8.829449583436367e-05, "loss": 1.4935, "step": 15392 }, { "epoch": 0.5512561104446075, "grad_norm": 1.4014348983764648, "learning_rate": 8.828297663843612e-05, "loss": 1.5126, "step": 15393 }, { "epoch": 0.5512919225741759, "grad_norm": 2.2653427124023438, "learning_rate": 8.827145760014595e-05, "loss": 1.7555, "step": 15394 }, { "epoch": 0.5513277347037442, "grad_norm": 1.639029622077942, "learning_rate": 8.825993871964823e-05, "loss": 1.6526, "step": 15395 }, { "epoch": 0.5513635468333125, "grad_norm": 1.7813209295272827, "learning_rate": 8.824841999709785e-05, "loss": 1.1952, "step": 15396 }, { "epoch": 0.5513993589628807, "grad_norm": 1.8362061977386475, "learning_rate": 8.823690143264988e-05, "loss": 1.6557, "step": 15397 }, { "epoch": 0.551435171092449, "grad_norm": 1.6683216094970703, "learning_rate": 8.822538302645916e-05, "loss": 1.7202, "step": 15398 }, { "epoch": 0.5514709832220173, "grad_norm": 1.9283270835876465, "learning_rate": 8.821386477868078e-05, "loss": 1.1112, "step": 15399 }, { "epoch": 0.5515067953515855, "grad_norm": 1.510189175605774, "learning_rate": 8.820234668946963e-05, "loss": 1.5949, "step": 15400 }, { "epoch": 0.5515426074811539, "grad_norm": 1.425412893295288, "learning_rate": 8.819082875898068e-05, "loss": 1.4958, "step": 15401 }, { "epoch": 0.5515784196107222, "grad_norm": 1.7452600002288818, "learning_rate": 8.817931098736891e-05, "loss": 1.4762, "step": 15402 }, { "epoch": 0.5516142317402905, "grad_norm": 1.7585675716400146, "learning_rate": 8.816779337478923e-05, "loss": 1.4523, "step": 15403 }, { "epoch": 0.5516500438698587, "grad_norm": 2.050502300262451, "learning_rate": 8.815627592139665e-05, "loss": 1.1354, "step": 15404 }, { "epoch": 0.551685855999427, "grad_norm": 1.3366676568984985, "learning_rate": 8.814475862734608e-05, "loss": 1.568, "step": 15405 }, { "epoch": 0.5517216681289953, "grad_norm": 1.8632868528366089, "learning_rate": 8.813324149279254e-05, "loss": 1.3182, "step": 15406 }, { "epoch": 0.5517574802585635, "grad_norm": 1.6237183809280396, "learning_rate": 8.812172451789086e-05, "loss": 1.2567, "step": 15407 }, { "epoch": 0.5517932923881319, "grad_norm": 1.8499741554260254, "learning_rate": 8.811020770279612e-05, "loss": 1.4372, "step": 15408 }, { "epoch": 0.5518291045177002, "grad_norm": 1.5876901149749756, "learning_rate": 8.809869104766318e-05, "loss": 1.6619, "step": 15409 }, { "epoch": 0.5518649166472684, "grad_norm": 1.637397289276123, "learning_rate": 8.808717455264698e-05, "loss": 1.7995, "step": 15410 }, { "epoch": 0.5519007287768367, "grad_norm": 1.551072120666504, "learning_rate": 8.80756582179025e-05, "loss": 1.5143, "step": 15411 }, { "epoch": 0.551936540906405, "grad_norm": 1.4706289768218994, "learning_rate": 8.806414204358465e-05, "loss": 1.3013, "step": 15412 }, { "epoch": 0.5519723530359733, "grad_norm": 1.3856925964355469, "learning_rate": 8.805262602984838e-05, "loss": 1.4325, "step": 15413 }, { "epoch": 0.5520081651655415, "grad_norm": 1.3337805271148682, "learning_rate": 8.804111017684858e-05, "loss": 1.4605, "step": 15414 }, { "epoch": 0.5520439772951099, "grad_norm": 1.699302077293396, "learning_rate": 8.802959448474025e-05, "loss": 1.3212, "step": 15415 }, { "epoch": 0.5520797894246782, "grad_norm": 1.4418692588806152, "learning_rate": 8.801807895367827e-05, "loss": 1.4426, "step": 15416 }, { "epoch": 0.5521156015542464, "grad_norm": 1.6650549173355103, "learning_rate": 8.80065635838176e-05, "loss": 1.3448, "step": 15417 }, { "epoch": 0.5521514136838147, "grad_norm": 1.9519139528274536, "learning_rate": 8.799504837531315e-05, "loss": 1.7081, "step": 15418 }, { "epoch": 0.552187225813383, "grad_norm": 1.3951752185821533, "learning_rate": 8.798353332831981e-05, "loss": 1.3193, "step": 15419 }, { "epoch": 0.5522230379429512, "grad_norm": 1.665259599685669, "learning_rate": 8.797201844299257e-05, "loss": 1.5382, "step": 15420 }, { "epoch": 0.5522588500725195, "grad_norm": 1.426015019416809, "learning_rate": 8.796050371948627e-05, "loss": 1.5341, "step": 15421 }, { "epoch": 0.5522946622020879, "grad_norm": 1.8713806867599487, "learning_rate": 8.794898915795588e-05, "loss": 1.6166, "step": 15422 }, { "epoch": 0.5523304743316562, "grad_norm": 1.9937385320663452, "learning_rate": 8.793747475855628e-05, "loss": 1.2537, "step": 15423 }, { "epoch": 0.5523662864612244, "grad_norm": 1.5066399574279785, "learning_rate": 8.792596052144242e-05, "loss": 1.7567, "step": 15424 }, { "epoch": 0.5524020985907927, "grad_norm": 1.2568379640579224, "learning_rate": 8.791444644676916e-05, "loss": 1.3674, "step": 15425 }, { "epoch": 0.552437910720361, "grad_norm": 1.4235587120056152, "learning_rate": 8.790293253469145e-05, "loss": 1.661, "step": 15426 }, { "epoch": 0.5524737228499292, "grad_norm": 2.1775028705596924, "learning_rate": 8.789141878536419e-05, "loss": 1.3307, "step": 15427 }, { "epoch": 0.5525095349794975, "grad_norm": 1.7875803709030151, "learning_rate": 8.787990519894224e-05, "loss": 1.4441, "step": 15428 }, { "epoch": 0.5525453471090659, "grad_norm": 1.3177114725112915, "learning_rate": 8.786839177558057e-05, "loss": 1.7458, "step": 15429 }, { "epoch": 0.5525811592386342, "grad_norm": 1.7923847436904907, "learning_rate": 8.7856878515434e-05, "loss": 1.6531, "step": 15430 }, { "epoch": 0.5526169713682024, "grad_norm": 1.7845262289047241, "learning_rate": 8.784536541865752e-05, "loss": 1.382, "step": 15431 }, { "epoch": 0.5526527834977707, "grad_norm": 1.7725820541381836, "learning_rate": 8.783385248540591e-05, "loss": 1.2906, "step": 15432 }, { "epoch": 0.552688595627339, "grad_norm": 1.6670875549316406, "learning_rate": 8.782233971583416e-05, "loss": 1.7273, "step": 15433 }, { "epoch": 0.5527244077569072, "grad_norm": 2.1010894775390625, "learning_rate": 8.781082711009709e-05, "loss": 1.7307, "step": 15434 }, { "epoch": 0.5527602198864755, "grad_norm": 1.56618070602417, "learning_rate": 8.779931466834965e-05, "loss": 1.2643, "step": 15435 }, { "epoch": 0.5527960320160439, "grad_norm": 1.57991623878479, "learning_rate": 8.778780239074669e-05, "loss": 1.3456, "step": 15436 }, { "epoch": 0.5528318441456122, "grad_norm": 2.865915060043335, "learning_rate": 8.777629027744307e-05, "loss": 1.5232, "step": 15437 }, { "epoch": 0.5528676562751804, "grad_norm": 1.6558449268341064, "learning_rate": 8.776477832859374e-05, "loss": 1.4121, "step": 15438 }, { "epoch": 0.5529034684047487, "grad_norm": 2.0313730239868164, "learning_rate": 8.77532665443535e-05, "loss": 1.3529, "step": 15439 }, { "epoch": 0.552939280534317, "grad_norm": 1.625325322151184, "learning_rate": 8.774175492487728e-05, "loss": 1.4956, "step": 15440 }, { "epoch": 0.5529750926638852, "grad_norm": 1.4914642572402954, "learning_rate": 8.77302434703199e-05, "loss": 1.3326, "step": 15441 }, { "epoch": 0.5530109047934535, "grad_norm": 2.1764352321624756, "learning_rate": 8.771873218083631e-05, "loss": 1.5886, "step": 15442 }, { "epoch": 0.5530467169230219, "grad_norm": 1.5987873077392578, "learning_rate": 8.770722105658132e-05, "loss": 1.3776, "step": 15443 }, { "epoch": 0.5530825290525901, "grad_norm": 1.513716459274292, "learning_rate": 8.769571009770982e-05, "loss": 1.3559, "step": 15444 }, { "epoch": 0.5531183411821584, "grad_norm": 1.3887488842010498, "learning_rate": 8.768419930437667e-05, "loss": 1.6609, "step": 15445 }, { "epoch": 0.5531541533117267, "grad_norm": 1.3617610931396484, "learning_rate": 8.767268867673671e-05, "loss": 1.6054, "step": 15446 }, { "epoch": 0.553189965441295, "grad_norm": 1.9251405000686646, "learning_rate": 8.766117821494485e-05, "loss": 1.323, "step": 15447 }, { "epoch": 0.5532257775708632, "grad_norm": 2.4319498538970947, "learning_rate": 8.76496679191559e-05, "loss": 1.7116, "step": 15448 }, { "epoch": 0.5532615897004315, "grad_norm": 2.2947161197662354, "learning_rate": 8.763815778952475e-05, "loss": 1.504, "step": 15449 }, { "epoch": 0.5532974018299999, "grad_norm": 1.3940249681472778, "learning_rate": 8.762664782620623e-05, "loss": 1.6442, "step": 15450 }, { "epoch": 0.5533332139595681, "grad_norm": 1.715877890586853, "learning_rate": 8.761513802935523e-05, "loss": 1.6743, "step": 15451 }, { "epoch": 0.5533690260891364, "grad_norm": 1.2846055030822754, "learning_rate": 8.760362839912654e-05, "loss": 1.4043, "step": 15452 }, { "epoch": 0.5534048382187047, "grad_norm": 1.3711576461791992, "learning_rate": 8.759211893567505e-05, "loss": 1.5188, "step": 15453 }, { "epoch": 0.5534406503482729, "grad_norm": 1.988338589668274, "learning_rate": 8.758060963915562e-05, "loss": 1.5419, "step": 15454 }, { "epoch": 0.5534764624778412, "grad_norm": 2.278092384338379, "learning_rate": 8.756910050972304e-05, "loss": 1.5371, "step": 15455 }, { "epoch": 0.5535122746074095, "grad_norm": 1.3961970806121826, "learning_rate": 8.755759154753219e-05, "loss": 1.3566, "step": 15456 }, { "epoch": 0.5535480867369779, "grad_norm": 1.6591577529907227, "learning_rate": 8.754608275273788e-05, "loss": 1.2977, "step": 15457 }, { "epoch": 0.5535838988665461, "grad_norm": 1.8924493789672852, "learning_rate": 8.753457412549497e-05, "loss": 1.4803, "step": 15458 }, { "epoch": 0.5536197109961144, "grad_norm": 1.7182888984680176, "learning_rate": 8.752306566595828e-05, "loss": 1.3179, "step": 15459 }, { "epoch": 0.5536555231256827, "grad_norm": 1.6327793598175049, "learning_rate": 8.751155737428267e-05, "loss": 1.6394, "step": 15460 }, { "epoch": 0.5536913352552509, "grad_norm": 1.3792765140533447, "learning_rate": 8.750004925062296e-05, "loss": 1.649, "step": 15461 }, { "epoch": 0.5537271473848192, "grad_norm": 1.5709487199783325, "learning_rate": 8.74885412951339e-05, "loss": 1.5722, "step": 15462 }, { "epoch": 0.5537629595143875, "grad_norm": 2.683051347732544, "learning_rate": 8.747703350797044e-05, "loss": 1.532, "step": 15463 }, { "epoch": 0.5537987716439559, "grad_norm": 1.412434458732605, "learning_rate": 8.74655258892873e-05, "loss": 1.4681, "step": 15464 }, { "epoch": 0.5538345837735241, "grad_norm": 1.9447873830795288, "learning_rate": 8.745401843923936e-05, "loss": 1.7162, "step": 15465 }, { "epoch": 0.5538703959030924, "grad_norm": 1.498840093612671, "learning_rate": 8.74425111579814e-05, "loss": 1.3753, "step": 15466 }, { "epoch": 0.5539062080326607, "grad_norm": 1.703001618385315, "learning_rate": 8.743100404566828e-05, "loss": 1.6041, "step": 15467 }, { "epoch": 0.5539420201622289, "grad_norm": 1.341660737991333, "learning_rate": 8.741949710245476e-05, "loss": 1.349, "step": 15468 }, { "epoch": 0.5539778322917972, "grad_norm": 2.8604416847229004, "learning_rate": 8.740799032849572e-05, "loss": 1.397, "step": 15469 }, { "epoch": 0.5540136444213655, "grad_norm": 1.7713994979858398, "learning_rate": 8.739648372394592e-05, "loss": 1.7506, "step": 15470 }, { "epoch": 0.5540494565509338, "grad_norm": 1.9339109659194946, "learning_rate": 8.738497728896013e-05, "loss": 1.3016, "step": 15471 }, { "epoch": 0.5540852686805021, "grad_norm": 1.7749813795089722, "learning_rate": 8.737347102369325e-05, "loss": 1.0844, "step": 15472 }, { "epoch": 0.5541210808100704, "grad_norm": 2.379054307937622, "learning_rate": 8.736196492829997e-05, "loss": 1.71, "step": 15473 }, { "epoch": 0.5541568929396387, "grad_norm": 2.4072840213775635, "learning_rate": 8.735045900293522e-05, "loss": 1.4868, "step": 15474 }, { "epoch": 0.5541927050692069, "grad_norm": 1.7160054445266724, "learning_rate": 8.733895324775366e-05, "loss": 1.4881, "step": 15475 }, { "epoch": 0.5542285171987752, "grad_norm": 2.1116483211517334, "learning_rate": 8.73274476629102e-05, "loss": 1.5777, "step": 15476 }, { "epoch": 0.5542643293283435, "grad_norm": 1.7254281044006348, "learning_rate": 8.731594224855956e-05, "loss": 1.5687, "step": 15477 }, { "epoch": 0.5543001414579118, "grad_norm": 1.8794066905975342, "learning_rate": 8.730443700485658e-05, "loss": 1.4314, "step": 15478 }, { "epoch": 0.5543359535874801, "grad_norm": 1.509204387664795, "learning_rate": 8.729293193195603e-05, "loss": 1.3987, "step": 15479 }, { "epoch": 0.5543717657170484, "grad_norm": 1.610112190246582, "learning_rate": 8.728142703001264e-05, "loss": 1.4781, "step": 15480 }, { "epoch": 0.5544075778466167, "grad_norm": 2.039473295211792, "learning_rate": 8.72699222991813e-05, "loss": 1.499, "step": 15481 }, { "epoch": 0.5544433899761849, "grad_norm": 3.141580581665039, "learning_rate": 8.725841773961669e-05, "loss": 1.7102, "step": 15482 }, { "epoch": 0.5544792021057532, "grad_norm": 2.184549570083618, "learning_rate": 8.724691335147367e-05, "loss": 1.6045, "step": 15483 }, { "epoch": 0.5545150142353215, "grad_norm": 1.9326050281524658, "learning_rate": 8.723540913490693e-05, "loss": 1.5926, "step": 15484 }, { "epoch": 0.5545508263648898, "grad_norm": 1.7067333459854126, "learning_rate": 8.722390509007137e-05, "loss": 1.3034, "step": 15485 }, { "epoch": 0.5545866384944581, "grad_norm": 1.8227620124816895, "learning_rate": 8.721240121712161e-05, "loss": 1.5729, "step": 15486 }, { "epoch": 0.5546224506240264, "grad_norm": 1.3171581029891968, "learning_rate": 8.720089751621256e-05, "loss": 1.4384, "step": 15487 }, { "epoch": 0.5546582627535946, "grad_norm": 1.4920912981033325, "learning_rate": 8.71893939874989e-05, "loss": 1.294, "step": 15488 }, { "epoch": 0.5546940748831629, "grad_norm": 1.4924278259277344, "learning_rate": 8.717789063113539e-05, "loss": 1.5305, "step": 15489 }, { "epoch": 0.5547298870127312, "grad_norm": 1.9032200574874878, "learning_rate": 8.716638744727687e-05, "loss": 1.2737, "step": 15490 }, { "epoch": 0.5547656991422995, "grad_norm": 1.732216477394104, "learning_rate": 8.7154884436078e-05, "loss": 1.2741, "step": 15491 }, { "epoch": 0.5548015112718678, "grad_norm": 1.5945426225662231, "learning_rate": 8.714338159769366e-05, "loss": 1.5329, "step": 15492 }, { "epoch": 0.5548373234014361, "grad_norm": 1.8944737911224365, "learning_rate": 8.713187893227847e-05, "loss": 1.2802, "step": 15493 }, { "epoch": 0.5548731355310044, "grad_norm": 1.787889003753662, "learning_rate": 8.71203764399873e-05, "loss": 1.5427, "step": 15494 }, { "epoch": 0.5549089476605726, "grad_norm": 1.3428040742874146, "learning_rate": 8.71088741209748e-05, "loss": 1.4309, "step": 15495 }, { "epoch": 0.5549447597901409, "grad_norm": 1.8700381517410278, "learning_rate": 8.709737197539583e-05, "loss": 1.6271, "step": 15496 }, { "epoch": 0.5549805719197092, "grad_norm": 1.4687343835830688, "learning_rate": 8.708587000340506e-05, "loss": 1.4383, "step": 15497 }, { "epoch": 0.5550163840492774, "grad_norm": 1.5101184844970703, "learning_rate": 8.707436820515723e-05, "loss": 1.4074, "step": 15498 }, { "epoch": 0.5550521961788458, "grad_norm": 1.7550967931747437, "learning_rate": 8.706286658080711e-05, "loss": 1.6502, "step": 15499 }, { "epoch": 0.5550880083084141, "grad_norm": 1.6945569515228271, "learning_rate": 8.705136513050944e-05, "loss": 1.2085, "step": 15500 }, { "epoch": 0.5551238204379824, "grad_norm": 1.832401990890503, "learning_rate": 8.703986385441895e-05, "loss": 1.4714, "step": 15501 }, { "epoch": 0.5551596325675506, "grad_norm": 1.4257477521896362, "learning_rate": 8.702836275269033e-05, "loss": 1.3904, "step": 15502 }, { "epoch": 0.5551954446971189, "grad_norm": 1.3720061779022217, "learning_rate": 8.701686182547842e-05, "loss": 1.4555, "step": 15503 }, { "epoch": 0.5552312568266872, "grad_norm": 1.8269095420837402, "learning_rate": 8.700536107293784e-05, "loss": 1.4776, "step": 15504 }, { "epoch": 0.5552670689562554, "grad_norm": 1.655152440071106, "learning_rate": 8.699386049522341e-05, "loss": 1.6387, "step": 15505 }, { "epoch": 0.5553028810858238, "grad_norm": 1.5963819026947021, "learning_rate": 8.69823600924898e-05, "loss": 1.5153, "step": 15506 }, { "epoch": 0.5553386932153921, "grad_norm": 1.3038275241851807, "learning_rate": 8.697085986489172e-05, "loss": 1.6874, "step": 15507 }, { "epoch": 0.5553745053449604, "grad_norm": 1.676790714263916, "learning_rate": 8.695935981258394e-05, "loss": 1.2814, "step": 15508 }, { "epoch": 0.5554103174745286, "grad_norm": 1.8038157224655151, "learning_rate": 8.694785993572112e-05, "loss": 1.3005, "step": 15509 }, { "epoch": 0.5554461296040969, "grad_norm": 1.5027525424957275, "learning_rate": 8.693636023445804e-05, "loss": 1.4746, "step": 15510 }, { "epoch": 0.5554819417336652, "grad_norm": 1.7654838562011719, "learning_rate": 8.692486070894935e-05, "loss": 1.6604, "step": 15511 }, { "epoch": 0.5555177538632334, "grad_norm": 1.8913116455078125, "learning_rate": 8.691336135934982e-05, "loss": 1.6467, "step": 15512 }, { "epoch": 0.5555535659928018, "grad_norm": 1.5639491081237793, "learning_rate": 8.69018621858141e-05, "loss": 1.033, "step": 15513 }, { "epoch": 0.5555893781223701, "grad_norm": 1.46904718875885, "learning_rate": 8.689036318849697e-05, "loss": 1.4832, "step": 15514 }, { "epoch": 0.5556251902519384, "grad_norm": 1.6029126644134521, "learning_rate": 8.68788643675531e-05, "loss": 1.5136, "step": 15515 }, { "epoch": 0.5556610023815066, "grad_norm": 1.6579877138137817, "learning_rate": 8.686736572313714e-05, "loss": 1.373, "step": 15516 }, { "epoch": 0.5556968145110749, "grad_norm": 1.6767444610595703, "learning_rate": 8.685586725540387e-05, "loss": 1.3224, "step": 15517 }, { "epoch": 0.5557326266406432, "grad_norm": 1.8820594549179077, "learning_rate": 8.684436896450791e-05, "loss": 1.6569, "step": 15518 }, { "epoch": 0.5557684387702114, "grad_norm": 1.8585944175720215, "learning_rate": 8.683287085060404e-05, "loss": 1.594, "step": 15519 }, { "epoch": 0.5558042508997798, "grad_norm": 1.6034525632858276, "learning_rate": 8.682137291384687e-05, "loss": 1.4458, "step": 15520 }, { "epoch": 0.5558400630293481, "grad_norm": 1.3279809951782227, "learning_rate": 8.680987515439116e-05, "loss": 1.4297, "step": 15521 }, { "epoch": 0.5558758751589163, "grad_norm": 1.51862370967865, "learning_rate": 8.679837757239156e-05, "loss": 1.6286, "step": 15522 }, { "epoch": 0.5559116872884846, "grad_norm": 1.6439845561981201, "learning_rate": 8.678688016800276e-05, "loss": 1.2965, "step": 15523 }, { "epoch": 0.5559474994180529, "grad_norm": 1.7435225248336792, "learning_rate": 8.677538294137945e-05, "loss": 1.6844, "step": 15524 }, { "epoch": 0.5559833115476212, "grad_norm": 1.3427046537399292, "learning_rate": 8.676388589267628e-05, "loss": 1.3433, "step": 15525 }, { "epoch": 0.5560191236771894, "grad_norm": 1.8325779438018799, "learning_rate": 8.675238902204797e-05, "loss": 1.5351, "step": 15526 }, { "epoch": 0.5560549358067578, "grad_norm": 1.5658588409423828, "learning_rate": 8.674089232964916e-05, "loss": 1.7484, "step": 15527 }, { "epoch": 0.5560907479363261, "grad_norm": 1.382843017578125, "learning_rate": 8.672939581563456e-05, "loss": 1.4252, "step": 15528 }, { "epoch": 0.5561265600658943, "grad_norm": 2.3734495639801025, "learning_rate": 8.67178994801588e-05, "loss": 1.3529, "step": 15529 }, { "epoch": 0.5561623721954626, "grad_norm": 1.3029054403305054, "learning_rate": 8.67064033233766e-05, "loss": 1.331, "step": 15530 }, { "epoch": 0.5561981843250309, "grad_norm": 1.900862455368042, "learning_rate": 8.669490734544256e-05, "loss": 1.6135, "step": 15531 }, { "epoch": 0.5562339964545991, "grad_norm": 1.9537557363510132, "learning_rate": 8.668341154651141e-05, "loss": 1.5744, "step": 15532 }, { "epoch": 0.5562698085841674, "grad_norm": 1.6260302066802979, "learning_rate": 8.667191592673779e-05, "loss": 1.5679, "step": 15533 }, { "epoch": 0.5563056207137358, "grad_norm": 2.043226957321167, "learning_rate": 8.666042048627632e-05, "loss": 1.6189, "step": 15534 }, { "epoch": 0.5563414328433041, "grad_norm": 1.6133548021316528, "learning_rate": 8.66489252252817e-05, "loss": 1.6424, "step": 15535 }, { "epoch": 0.5563772449728723, "grad_norm": 1.5509310960769653, "learning_rate": 8.663743014390855e-05, "loss": 1.4819, "step": 15536 }, { "epoch": 0.5564130571024406, "grad_norm": 1.5829284191131592, "learning_rate": 8.662593524231158e-05, "loss": 1.2861, "step": 15537 }, { "epoch": 0.5564488692320089, "grad_norm": 1.2532970905303955, "learning_rate": 8.661444052064536e-05, "loss": 1.128, "step": 15538 }, { "epoch": 0.5564846813615771, "grad_norm": 1.8142882585525513, "learning_rate": 8.66029459790646e-05, "loss": 1.4828, "step": 15539 }, { "epoch": 0.5565204934911454, "grad_norm": 1.6689441204071045, "learning_rate": 8.65914516177239e-05, "loss": 1.5474, "step": 15540 }, { "epoch": 0.5565563056207138, "grad_norm": 1.3482906818389893, "learning_rate": 8.657995743677793e-05, "loss": 1.5433, "step": 15541 }, { "epoch": 0.5565921177502821, "grad_norm": 2.2926156520843506, "learning_rate": 8.656846343638135e-05, "loss": 1.6321, "step": 15542 }, { "epoch": 0.5566279298798503, "grad_norm": 1.465704083442688, "learning_rate": 8.655696961668873e-05, "loss": 1.5127, "step": 15543 }, { "epoch": 0.5566637420094186, "grad_norm": 2.068370819091797, "learning_rate": 8.654547597785478e-05, "loss": 1.6719, "step": 15544 }, { "epoch": 0.5566995541389869, "grad_norm": 1.6043318510055542, "learning_rate": 8.653398252003406e-05, "loss": 1.426, "step": 15545 }, { "epoch": 0.5567353662685551, "grad_norm": 1.5261003971099854, "learning_rate": 8.652248924338126e-05, "loss": 1.507, "step": 15546 }, { "epoch": 0.5567711783981234, "grad_norm": 1.734169363975525, "learning_rate": 8.651099614805097e-05, "loss": 1.516, "step": 15547 }, { "epoch": 0.5568069905276918, "grad_norm": 1.581032633781433, "learning_rate": 8.649950323419783e-05, "loss": 1.2642, "step": 15548 }, { "epoch": 0.55684280265726, "grad_norm": 1.9459565877914429, "learning_rate": 8.648801050197646e-05, "loss": 1.6195, "step": 15549 }, { "epoch": 0.5568786147868283, "grad_norm": 1.9734375476837158, "learning_rate": 8.647651795154148e-05, "loss": 1.366, "step": 15550 }, { "epoch": 0.5569144269163966, "grad_norm": 1.4467110633850098, "learning_rate": 8.646502558304751e-05, "loss": 1.4128, "step": 15551 }, { "epoch": 0.5569502390459649, "grad_norm": 2.0393340587615967, "learning_rate": 8.645353339664915e-05, "loss": 1.4668, "step": 15552 }, { "epoch": 0.5569860511755331, "grad_norm": 1.865412950515747, "learning_rate": 8.644204139250105e-05, "loss": 1.4342, "step": 15553 }, { "epoch": 0.5570218633051014, "grad_norm": 1.486951470375061, "learning_rate": 8.643054957075776e-05, "loss": 1.3204, "step": 15554 }, { "epoch": 0.5570576754346698, "grad_norm": 1.633739709854126, "learning_rate": 8.641905793157395e-05, "loss": 1.3796, "step": 15555 }, { "epoch": 0.557093487564238, "grad_norm": 1.7872612476348877, "learning_rate": 8.640756647510417e-05, "loss": 1.6145, "step": 15556 }, { "epoch": 0.5571292996938063, "grad_norm": 1.5471066236495972, "learning_rate": 8.639607520150308e-05, "loss": 1.4271, "step": 15557 }, { "epoch": 0.5571651118233746, "grad_norm": 1.6009304523468018, "learning_rate": 8.638458411092527e-05, "loss": 1.2723, "step": 15558 }, { "epoch": 0.5572009239529429, "grad_norm": 1.389464259147644, "learning_rate": 8.637309320352526e-05, "loss": 1.6569, "step": 15559 }, { "epoch": 0.5572367360825111, "grad_norm": 1.8739726543426514, "learning_rate": 8.636160247945774e-05, "loss": 1.6658, "step": 15560 }, { "epoch": 0.5572725482120794, "grad_norm": 1.4336637258529663, "learning_rate": 8.635011193887725e-05, "loss": 1.4241, "step": 15561 }, { "epoch": 0.5573083603416478, "grad_norm": 1.8983516693115234, "learning_rate": 8.633862158193841e-05, "loss": 1.4288, "step": 15562 }, { "epoch": 0.557344172471216, "grad_norm": 1.7363568544387817, "learning_rate": 8.632713140879577e-05, "loss": 1.6088, "step": 15563 }, { "epoch": 0.5573799846007843, "grad_norm": 1.7875018119812012, "learning_rate": 8.631564141960397e-05, "loss": 1.4726, "step": 15564 }, { "epoch": 0.5574157967303526, "grad_norm": 1.811866044998169, "learning_rate": 8.630415161451754e-05, "loss": 1.6785, "step": 15565 }, { "epoch": 0.5574516088599208, "grad_norm": 1.505098581314087, "learning_rate": 8.62926619936911e-05, "loss": 1.4012, "step": 15566 }, { "epoch": 0.5574874209894891, "grad_norm": 1.4308191537857056, "learning_rate": 8.628117255727924e-05, "loss": 1.4559, "step": 15567 }, { "epoch": 0.5575232331190574, "grad_norm": 1.7727469205856323, "learning_rate": 8.626968330543643e-05, "loss": 1.4809, "step": 15568 }, { "epoch": 0.5575590452486258, "grad_norm": 1.7335854768753052, "learning_rate": 8.62581942383174e-05, "loss": 1.5017, "step": 15569 }, { "epoch": 0.557594857378194, "grad_norm": 1.486517071723938, "learning_rate": 8.624670535607658e-05, "loss": 1.3723, "step": 15570 }, { "epoch": 0.5576306695077623, "grad_norm": 2.2205562591552734, "learning_rate": 8.623521665886865e-05, "loss": 1.7048, "step": 15571 }, { "epoch": 0.5576664816373306, "grad_norm": 1.4245879650115967, "learning_rate": 8.622372814684806e-05, "loss": 1.4024, "step": 15572 }, { "epoch": 0.5577022937668988, "grad_norm": 1.3684970140457153, "learning_rate": 8.621223982016948e-05, "loss": 1.4418, "step": 15573 }, { "epoch": 0.5577381058964671, "grad_norm": 1.8130236864089966, "learning_rate": 8.620075167898743e-05, "loss": 1.4848, "step": 15574 }, { "epoch": 0.5577739180260354, "grad_norm": 1.3739123344421387, "learning_rate": 8.618926372345645e-05, "loss": 1.583, "step": 15575 }, { "epoch": 0.5578097301556038, "grad_norm": 1.366071105003357, "learning_rate": 8.617777595373117e-05, "loss": 1.4026, "step": 15576 }, { "epoch": 0.557845542285172, "grad_norm": 1.5953060388565063, "learning_rate": 8.6166288369966e-05, "loss": 1.2789, "step": 15577 }, { "epoch": 0.5578813544147403, "grad_norm": 1.4939026832580566, "learning_rate": 8.615480097231564e-05, "loss": 1.1593, "step": 15578 }, { "epoch": 0.5579171665443086, "grad_norm": 1.7710267305374146, "learning_rate": 8.614331376093452e-05, "loss": 1.7925, "step": 15579 }, { "epoch": 0.5579529786738768, "grad_norm": 1.9933295249938965, "learning_rate": 8.613182673597729e-05, "loss": 1.5553, "step": 15580 }, { "epoch": 0.5579887908034451, "grad_norm": 2.1700258255004883, "learning_rate": 8.612033989759838e-05, "loss": 1.4692, "step": 15581 }, { "epoch": 0.5580246029330134, "grad_norm": 1.4422943592071533, "learning_rate": 8.610885324595249e-05, "loss": 1.5315, "step": 15582 }, { "epoch": 0.5580604150625817, "grad_norm": 1.459005355834961, "learning_rate": 8.609736678119396e-05, "loss": 1.4583, "step": 15583 }, { "epoch": 0.55809622719215, "grad_norm": 1.8012620210647583, "learning_rate": 8.60858805034775e-05, "loss": 1.6828, "step": 15584 }, { "epoch": 0.5581320393217183, "grad_norm": 1.4135667085647583, "learning_rate": 8.607439441295755e-05, "loss": 1.4163, "step": 15585 }, { "epoch": 0.5581678514512866, "grad_norm": 1.6235884428024292, "learning_rate": 8.606290850978862e-05, "loss": 1.4135, "step": 15586 }, { "epoch": 0.5582036635808548, "grad_norm": 2.004629611968994, "learning_rate": 8.605142279412533e-05, "loss": 1.7444, "step": 15587 }, { "epoch": 0.5582394757104231, "grad_norm": 1.713678002357483, "learning_rate": 8.60399372661221e-05, "loss": 1.7045, "step": 15588 }, { "epoch": 0.5582752878399914, "grad_norm": 1.9293084144592285, "learning_rate": 8.602845192593359e-05, "loss": 1.1365, "step": 15589 }, { "epoch": 0.5583110999695597, "grad_norm": 1.6164830923080444, "learning_rate": 8.601696677371414e-05, "loss": 1.6059, "step": 15590 }, { "epoch": 0.558346912099128, "grad_norm": 2.2314774990081787, "learning_rate": 8.600548180961845e-05, "loss": 1.342, "step": 15591 }, { "epoch": 0.5583827242286963, "grad_norm": 1.5393816232681274, "learning_rate": 8.599399703380087e-05, "loss": 1.3335, "step": 15592 }, { "epoch": 0.5584185363582646, "grad_norm": 1.6473439931869507, "learning_rate": 8.598251244641608e-05, "loss": 1.4625, "step": 15593 }, { "epoch": 0.5584543484878328, "grad_norm": 1.7329156398773193, "learning_rate": 8.597102804761846e-05, "loss": 1.6533, "step": 15594 }, { "epoch": 0.5584901606174011, "grad_norm": 1.7898093461990356, "learning_rate": 8.595954383756256e-05, "loss": 1.3945, "step": 15595 }, { "epoch": 0.5585259727469694, "grad_norm": 1.6420942544937134, "learning_rate": 8.594805981640289e-05, "loss": 1.5115, "step": 15596 }, { "epoch": 0.5585617848765377, "grad_norm": 1.4303438663482666, "learning_rate": 8.593657598429395e-05, "loss": 1.4199, "step": 15597 }, { "epoch": 0.558597597006106, "grad_norm": 1.711222767829895, "learning_rate": 8.592509234139023e-05, "loss": 1.3911, "step": 15598 }, { "epoch": 0.5586334091356743, "grad_norm": 1.6813573837280273, "learning_rate": 8.591360888784622e-05, "loss": 1.3908, "step": 15599 }, { "epoch": 0.5586692212652425, "grad_norm": 1.3513386249542236, "learning_rate": 8.590212562381649e-05, "loss": 1.5484, "step": 15600 }, { "epoch": 0.5587050333948108, "grad_norm": 2.3890459537506104, "learning_rate": 8.589064254945541e-05, "loss": 1.5111, "step": 15601 }, { "epoch": 0.5587408455243791, "grad_norm": 1.4889732599258423, "learning_rate": 8.58791596649176e-05, "loss": 1.4165, "step": 15602 }, { "epoch": 0.5587766576539474, "grad_norm": 2.1360843181610107, "learning_rate": 8.586767697035745e-05, "loss": 1.5494, "step": 15603 }, { "epoch": 0.5588124697835157, "grad_norm": 1.2866389751434326, "learning_rate": 8.585619446592947e-05, "loss": 1.4871, "step": 15604 }, { "epoch": 0.558848281913084, "grad_norm": 1.881407380104065, "learning_rate": 8.584471215178817e-05, "loss": 1.5327, "step": 15605 }, { "epoch": 0.5588840940426523, "grad_norm": 2.2813119888305664, "learning_rate": 8.583323002808798e-05, "loss": 1.8198, "step": 15606 }, { "epoch": 0.5589199061722205, "grad_norm": 1.8343212604522705, "learning_rate": 8.582174809498343e-05, "loss": 1.5162, "step": 15607 }, { "epoch": 0.5589557183017888, "grad_norm": 1.4812381267547607, "learning_rate": 8.581026635262894e-05, "loss": 1.6672, "step": 15608 }, { "epoch": 0.5589915304313571, "grad_norm": 1.795681357383728, "learning_rate": 8.579878480117906e-05, "loss": 1.361, "step": 15609 }, { "epoch": 0.5590273425609253, "grad_norm": 1.547515630722046, "learning_rate": 8.578730344078817e-05, "loss": 1.1694, "step": 15610 }, { "epoch": 0.5590631546904937, "grad_norm": 2.669339656829834, "learning_rate": 8.577582227161081e-05, "loss": 1.6002, "step": 15611 }, { "epoch": 0.559098966820062, "grad_norm": 1.5305811166763306, "learning_rate": 8.57643412938014e-05, "loss": 1.2915, "step": 15612 }, { "epoch": 0.5591347789496303, "grad_norm": 2.1755855083465576, "learning_rate": 8.575286050751441e-05, "loss": 1.5044, "step": 15613 }, { "epoch": 0.5591705910791985, "grad_norm": 1.5070805549621582, "learning_rate": 8.574137991290432e-05, "loss": 1.6467, "step": 15614 }, { "epoch": 0.5592064032087668, "grad_norm": 1.981643795967102, "learning_rate": 8.572989951012554e-05, "loss": 1.484, "step": 15615 }, { "epoch": 0.5592422153383351, "grad_norm": 1.5245190858840942, "learning_rate": 8.571841929933258e-05, "loss": 1.5356, "step": 15616 }, { "epoch": 0.5592780274679033, "grad_norm": 1.464753270149231, "learning_rate": 8.570693928067986e-05, "loss": 1.2764, "step": 15617 }, { "epoch": 0.5593138395974717, "grad_norm": 1.790344476699829, "learning_rate": 8.569545945432185e-05, "loss": 1.3695, "step": 15618 }, { "epoch": 0.55934965172704, "grad_norm": 2.0477633476257324, "learning_rate": 8.568397982041294e-05, "loss": 1.5747, "step": 15619 }, { "epoch": 0.5593854638566083, "grad_norm": 1.3731274604797363, "learning_rate": 8.567250037910766e-05, "loss": 1.5434, "step": 15620 }, { "epoch": 0.5594212759861765, "grad_norm": 2.218919277191162, "learning_rate": 8.56610211305604e-05, "loss": 1.7308, "step": 15621 }, { "epoch": 0.5594570881157448, "grad_norm": 1.6405763626098633, "learning_rate": 8.564954207492558e-05, "loss": 1.5039, "step": 15622 }, { "epoch": 0.5594929002453131, "grad_norm": 1.4550385475158691, "learning_rate": 8.56380632123577e-05, "loss": 1.303, "step": 15623 }, { "epoch": 0.5595287123748813, "grad_norm": 2.2137668132781982, "learning_rate": 8.56265845430111e-05, "loss": 1.7529, "step": 15624 }, { "epoch": 0.5595645245044497, "grad_norm": 1.7952656745910645, "learning_rate": 8.561510606704031e-05, "loss": 1.4931, "step": 15625 }, { "epoch": 0.559600336634018, "grad_norm": 2.030855417251587, "learning_rate": 8.560362778459968e-05, "loss": 1.302, "step": 15626 }, { "epoch": 0.5596361487635862, "grad_norm": 1.3937506675720215, "learning_rate": 8.55921496958437e-05, "loss": 1.5187, "step": 15627 }, { "epoch": 0.5596719608931545, "grad_norm": 2.013484477996826, "learning_rate": 8.558067180092673e-05, "loss": 1.5022, "step": 15628 }, { "epoch": 0.5597077730227228, "grad_norm": 1.5687587261199951, "learning_rate": 8.556919410000323e-05, "loss": 1.4391, "step": 15629 }, { "epoch": 0.5597435851522911, "grad_norm": 1.5793520212173462, "learning_rate": 8.555771659322765e-05, "loss": 1.5087, "step": 15630 }, { "epoch": 0.5597793972818593, "grad_norm": 1.5034009218215942, "learning_rate": 8.55462392807543e-05, "loss": 1.4148, "step": 15631 }, { "epoch": 0.5598152094114277, "grad_norm": 1.5135999917984009, "learning_rate": 8.55347621627377e-05, "loss": 1.3035, "step": 15632 }, { "epoch": 0.559851021540996, "grad_norm": 1.4237425327301025, "learning_rate": 8.55232852393322e-05, "loss": 1.4377, "step": 15633 }, { "epoch": 0.5598868336705642, "grad_norm": 1.6626579761505127, "learning_rate": 8.551180851069222e-05, "loss": 1.3463, "step": 15634 }, { "epoch": 0.5599226458001325, "grad_norm": 1.5885761976242065, "learning_rate": 8.550033197697218e-05, "loss": 1.3632, "step": 15635 }, { "epoch": 0.5599584579297008, "grad_norm": 1.4434884786605835, "learning_rate": 8.548885563832646e-05, "loss": 1.2362, "step": 15636 }, { "epoch": 0.559994270059269, "grad_norm": 1.8405886888504028, "learning_rate": 8.547737949490946e-05, "loss": 1.6275, "step": 15637 }, { "epoch": 0.5600300821888373, "grad_norm": 1.6832880973815918, "learning_rate": 8.546590354687562e-05, "loss": 1.7815, "step": 15638 }, { "epoch": 0.5600658943184057, "grad_norm": 1.6571459770202637, "learning_rate": 8.545442779437928e-05, "loss": 1.4971, "step": 15639 }, { "epoch": 0.560101706447974, "grad_norm": 1.7768093347549438, "learning_rate": 8.544295223757484e-05, "loss": 1.6878, "step": 15640 }, { "epoch": 0.5601375185775422, "grad_norm": 1.4609851837158203, "learning_rate": 8.543147687661673e-05, "loss": 1.3395, "step": 15641 }, { "epoch": 0.5601733307071105, "grad_norm": 1.545523762702942, "learning_rate": 8.542000171165928e-05, "loss": 1.5403, "step": 15642 }, { "epoch": 0.5602091428366788, "grad_norm": 1.7643197774887085, "learning_rate": 8.540852674285691e-05, "loss": 1.7647, "step": 15643 }, { "epoch": 0.560244954966247, "grad_norm": 1.7095211744308472, "learning_rate": 8.539705197036398e-05, "loss": 1.1087, "step": 15644 }, { "epoch": 0.5602807670958153, "grad_norm": 1.9334076642990112, "learning_rate": 8.53855773943349e-05, "loss": 1.6575, "step": 15645 }, { "epoch": 0.5603165792253836, "grad_norm": 1.9181638956069946, "learning_rate": 8.537410301492398e-05, "loss": 1.4329, "step": 15646 }, { "epoch": 0.560352391354952, "grad_norm": 1.4931620359420776, "learning_rate": 8.53626288322857e-05, "loss": 1.3542, "step": 15647 }, { "epoch": 0.5603882034845202, "grad_norm": 1.7490475177764893, "learning_rate": 8.535115484657434e-05, "loss": 1.2913, "step": 15648 }, { "epoch": 0.5604240156140885, "grad_norm": 1.412573218345642, "learning_rate": 8.533968105794428e-05, "loss": 1.453, "step": 15649 }, { "epoch": 0.5604598277436568, "grad_norm": 1.6083836555480957, "learning_rate": 8.532820746654993e-05, "loss": 1.6949, "step": 15650 }, { "epoch": 0.560495639873225, "grad_norm": 1.9700466394424438, "learning_rate": 8.53167340725456e-05, "loss": 1.4605, "step": 15651 }, { "epoch": 0.5605314520027933, "grad_norm": 1.7195794582366943, "learning_rate": 8.530526087608569e-05, "loss": 1.54, "step": 15652 }, { "epoch": 0.5605672641323616, "grad_norm": 1.3336139917373657, "learning_rate": 8.529378787732451e-05, "loss": 1.5449, "step": 15653 }, { "epoch": 0.56060307626193, "grad_norm": 1.7417322397232056, "learning_rate": 8.528231507641648e-05, "loss": 1.2722, "step": 15654 }, { "epoch": 0.5606388883914982, "grad_norm": 1.8747280836105347, "learning_rate": 8.527084247351595e-05, "loss": 1.2224, "step": 15655 }, { "epoch": 0.5606747005210665, "grad_norm": 1.4631984233856201, "learning_rate": 8.525937006877714e-05, "loss": 1.2657, "step": 15656 }, { "epoch": 0.5607105126506348, "grad_norm": 2.2184841632843018, "learning_rate": 8.524789786235458e-05, "loss": 1.437, "step": 15657 }, { "epoch": 0.560746324780203, "grad_norm": 1.5433226823806763, "learning_rate": 8.523642585440245e-05, "loss": 1.5117, "step": 15658 }, { "epoch": 0.5607821369097713, "grad_norm": 1.7362242937088013, "learning_rate": 8.522495404507521e-05, "loss": 1.7659, "step": 15659 }, { "epoch": 0.5608179490393396, "grad_norm": 1.6187702417373657, "learning_rate": 8.521348243452714e-05, "loss": 1.3126, "step": 15660 }, { "epoch": 0.560853761168908, "grad_norm": 1.7105422019958496, "learning_rate": 8.52020110229126e-05, "loss": 1.468, "step": 15661 }, { "epoch": 0.5608895732984762, "grad_norm": 1.5425986051559448, "learning_rate": 8.51905398103859e-05, "loss": 1.219, "step": 15662 }, { "epoch": 0.5609253854280445, "grad_norm": 1.5455344915390015, "learning_rate": 8.51790687971014e-05, "loss": 1.4256, "step": 15663 }, { "epoch": 0.5609611975576128, "grad_norm": 1.4394350051879883, "learning_rate": 8.516759798321345e-05, "loss": 1.3927, "step": 15664 }, { "epoch": 0.560997009687181, "grad_norm": 1.6390577554702759, "learning_rate": 8.515612736887627e-05, "loss": 1.3861, "step": 15665 }, { "epoch": 0.5610328218167493, "grad_norm": 1.270395040512085, "learning_rate": 8.51446569542443e-05, "loss": 1.0775, "step": 15666 }, { "epoch": 0.5610686339463176, "grad_norm": 2.1888267993927, "learning_rate": 8.513318673947173e-05, "loss": 1.8735, "step": 15667 }, { "epoch": 0.5611044460758859, "grad_norm": 1.4443387985229492, "learning_rate": 8.512171672471305e-05, "loss": 1.3064, "step": 15668 }, { "epoch": 0.5611402582054542, "grad_norm": 1.3692225217819214, "learning_rate": 8.51102469101224e-05, "loss": 1.2717, "step": 15669 }, { "epoch": 0.5611760703350225, "grad_norm": 1.8987263441085815, "learning_rate": 8.509877729585423e-05, "loss": 1.5354, "step": 15670 }, { "epoch": 0.5612118824645907, "grad_norm": 1.4163073301315308, "learning_rate": 8.508730788206273e-05, "loss": 1.1945, "step": 15671 }, { "epoch": 0.561247694594159, "grad_norm": 1.583075761795044, "learning_rate": 8.507583866890233e-05, "loss": 1.669, "step": 15672 }, { "epoch": 0.5612835067237273, "grad_norm": 1.6579574346542358, "learning_rate": 8.506436965652728e-05, "loss": 1.2003, "step": 15673 }, { "epoch": 0.5613193188532956, "grad_norm": 3.4547436237335205, "learning_rate": 8.50529008450918e-05, "loss": 1.6151, "step": 15674 }, { "epoch": 0.5613551309828639, "grad_norm": 1.7894209623336792, "learning_rate": 8.504143223475031e-05, "loss": 1.4897, "step": 15675 }, { "epoch": 0.5613909431124322, "grad_norm": 1.423621416091919, "learning_rate": 8.502996382565702e-05, "loss": 1.6746, "step": 15676 }, { "epoch": 0.5614267552420005, "grad_norm": 1.8405758142471313, "learning_rate": 8.501849561796631e-05, "loss": 1.2419, "step": 15677 }, { "epoch": 0.5614625673715687, "grad_norm": 1.9967997074127197, "learning_rate": 8.500702761183234e-05, "loss": 1.4297, "step": 15678 }, { "epoch": 0.561498379501137, "grad_norm": 2.1513876914978027, "learning_rate": 8.499555980740956e-05, "loss": 1.4807, "step": 15679 }, { "epoch": 0.5615341916307053, "grad_norm": 1.7307462692260742, "learning_rate": 8.498409220485208e-05, "loss": 1.5421, "step": 15680 }, { "epoch": 0.5615700037602736, "grad_norm": 1.9727336168289185, "learning_rate": 8.497262480431435e-05, "loss": 1.393, "step": 15681 }, { "epoch": 0.5616058158898419, "grad_norm": 1.4967458248138428, "learning_rate": 8.496115760595054e-05, "loss": 1.4945, "step": 15682 }, { "epoch": 0.5616416280194102, "grad_norm": 1.8036267757415771, "learning_rate": 8.494969060991493e-05, "loss": 1.5742, "step": 15683 }, { "epoch": 0.5616774401489785, "grad_norm": 1.3200416564941406, "learning_rate": 8.493822381636185e-05, "loss": 1.4853, "step": 15684 }, { "epoch": 0.5617132522785467, "grad_norm": 1.4074757099151611, "learning_rate": 8.49267572254455e-05, "loss": 1.5135, "step": 15685 }, { "epoch": 0.561749064408115, "grad_norm": 1.7859296798706055, "learning_rate": 8.491529083732025e-05, "loss": 1.4796, "step": 15686 }, { "epoch": 0.5617848765376833, "grad_norm": 1.585807204246521, "learning_rate": 8.490382465214025e-05, "loss": 1.5701, "step": 15687 }, { "epoch": 0.5618206886672515, "grad_norm": 2.1410164833068848, "learning_rate": 8.489235867005985e-05, "loss": 1.5223, "step": 15688 }, { "epoch": 0.5618565007968199, "grad_norm": 1.5662782192230225, "learning_rate": 8.488089289123324e-05, "loss": 1.3799, "step": 15689 }, { "epoch": 0.5618923129263882, "grad_norm": 1.8063139915466309, "learning_rate": 8.486942731581478e-05, "loss": 1.5595, "step": 15690 }, { "epoch": 0.5619281250559565, "grad_norm": 2.0988364219665527, "learning_rate": 8.485796194395862e-05, "loss": 1.4958, "step": 15691 }, { "epoch": 0.5619639371855247, "grad_norm": 1.6503610610961914, "learning_rate": 8.484649677581904e-05, "loss": 1.4374, "step": 15692 }, { "epoch": 0.561999749315093, "grad_norm": 2.550079584121704, "learning_rate": 8.483503181155031e-05, "loss": 1.3537, "step": 15693 }, { "epoch": 0.5620355614446613, "grad_norm": 1.5846173763275146, "learning_rate": 8.482356705130665e-05, "loss": 1.4495, "step": 15694 }, { "epoch": 0.5620713735742295, "grad_norm": 1.265183687210083, "learning_rate": 8.481210249524234e-05, "loss": 1.4833, "step": 15695 }, { "epoch": 0.5621071857037979, "grad_norm": 1.5761173963546753, "learning_rate": 8.480063814351159e-05, "loss": 1.3988, "step": 15696 }, { "epoch": 0.5621429978333662, "grad_norm": 1.5768393278121948, "learning_rate": 8.478917399626865e-05, "loss": 1.3578, "step": 15697 }, { "epoch": 0.5621788099629345, "grad_norm": 2.0033600330352783, "learning_rate": 8.477771005366772e-05, "loss": 1.3293, "step": 15698 }, { "epoch": 0.5622146220925027, "grad_norm": 2.234950065612793, "learning_rate": 8.476624631586313e-05, "loss": 1.5952, "step": 15699 }, { "epoch": 0.562250434222071, "grad_norm": 1.814936637878418, "learning_rate": 8.475478278300902e-05, "loss": 1.1814, "step": 15700 }, { "epoch": 0.5622862463516393, "grad_norm": 1.5817487239837646, "learning_rate": 8.474331945525963e-05, "loss": 1.5717, "step": 15701 }, { "epoch": 0.5623220584812075, "grad_norm": 1.440826654434204, "learning_rate": 8.47318563327692e-05, "loss": 1.4432, "step": 15702 }, { "epoch": 0.5623578706107759, "grad_norm": 1.247573733329773, "learning_rate": 8.472039341569195e-05, "loss": 1.5174, "step": 15703 }, { "epoch": 0.5623936827403442, "grad_norm": 1.999563217163086, "learning_rate": 8.470893070418211e-05, "loss": 1.622, "step": 15704 }, { "epoch": 0.5624294948699124, "grad_norm": 2.3003087043762207, "learning_rate": 8.469746819839387e-05, "loss": 1.4743, "step": 15705 }, { "epoch": 0.5624653069994807, "grad_norm": 1.6141663789749146, "learning_rate": 8.468600589848146e-05, "loss": 1.0057, "step": 15706 }, { "epoch": 0.562501119129049, "grad_norm": 1.4969419240951538, "learning_rate": 8.467454380459907e-05, "loss": 1.4611, "step": 15707 }, { "epoch": 0.5625369312586173, "grad_norm": 1.8018219470977783, "learning_rate": 8.466308191690096e-05, "loss": 1.7724, "step": 15708 }, { "epoch": 0.5625727433881855, "grad_norm": 1.618523359298706, "learning_rate": 8.46516202355413e-05, "loss": 1.318, "step": 15709 }, { "epoch": 0.5626085555177539, "grad_norm": 1.5267356634140015, "learning_rate": 8.464015876067425e-05, "loss": 1.5282, "step": 15710 }, { "epoch": 0.5626443676473222, "grad_norm": 2.576260566711426, "learning_rate": 8.462869749245408e-05, "loss": 1.381, "step": 15711 }, { "epoch": 0.5626801797768904, "grad_norm": 1.6371855735778809, "learning_rate": 8.461723643103494e-05, "loss": 1.516, "step": 15712 }, { "epoch": 0.5627159919064587, "grad_norm": 2.0176661014556885, "learning_rate": 8.460577557657107e-05, "loss": 1.5839, "step": 15713 }, { "epoch": 0.562751804036027, "grad_norm": 1.6947283744812012, "learning_rate": 8.45943149292166e-05, "loss": 1.2497, "step": 15714 }, { "epoch": 0.5627876161655953, "grad_norm": 1.763598918914795, "learning_rate": 8.458285448912578e-05, "loss": 1.5134, "step": 15715 }, { "epoch": 0.5628234282951635, "grad_norm": 1.442230463027954, "learning_rate": 8.457139425645273e-05, "loss": 1.3091, "step": 15716 }, { "epoch": 0.5628592404247319, "grad_norm": 2.001328706741333, "learning_rate": 8.455993423135172e-05, "loss": 1.4218, "step": 15717 }, { "epoch": 0.5628950525543002, "grad_norm": 1.904796838760376, "learning_rate": 8.454847441397684e-05, "loss": 1.7912, "step": 15718 }, { "epoch": 0.5629308646838684, "grad_norm": 2.0739176273345947, "learning_rate": 8.45370148044823e-05, "loss": 1.5481, "step": 15719 }, { "epoch": 0.5629666768134367, "grad_norm": 2.203639030456543, "learning_rate": 8.452555540302231e-05, "loss": 1.6342, "step": 15720 }, { "epoch": 0.563002488943005, "grad_norm": 1.9044723510742188, "learning_rate": 8.451409620975099e-05, "loss": 1.491, "step": 15721 }, { "epoch": 0.5630383010725732, "grad_norm": 1.4646804332733154, "learning_rate": 8.450263722482255e-05, "loss": 1.2206, "step": 15722 }, { "epoch": 0.5630741132021415, "grad_norm": 2.891911268234253, "learning_rate": 8.44911784483911e-05, "loss": 1.8076, "step": 15723 }, { "epoch": 0.5631099253317099, "grad_norm": 1.5828810930252075, "learning_rate": 8.447971988061088e-05, "loss": 1.6057, "step": 15724 }, { "epoch": 0.5631457374612782, "grad_norm": 1.6931911706924438, "learning_rate": 8.446826152163598e-05, "loss": 1.6362, "step": 15725 }, { "epoch": 0.5631815495908464, "grad_norm": 1.713527798652649, "learning_rate": 8.44568033716206e-05, "loss": 1.251, "step": 15726 }, { "epoch": 0.5632173617204147, "grad_norm": 1.8473023176193237, "learning_rate": 8.444534543071891e-05, "loss": 1.661, "step": 15727 }, { "epoch": 0.563253173849983, "grad_norm": 1.4170857667922974, "learning_rate": 8.443388769908498e-05, "loss": 1.5997, "step": 15728 }, { "epoch": 0.5632889859795512, "grad_norm": 2.1845030784606934, "learning_rate": 8.442243017687304e-05, "loss": 1.2984, "step": 15729 }, { "epoch": 0.5633247981091195, "grad_norm": 2.019676446914673, "learning_rate": 8.44109728642372e-05, "loss": 1.5803, "step": 15730 }, { "epoch": 0.5633606102386879, "grad_norm": 2.300466537475586, "learning_rate": 8.439951576133162e-05, "loss": 1.8922, "step": 15731 }, { "epoch": 0.5633964223682562, "grad_norm": 1.6747887134552002, "learning_rate": 8.438805886831042e-05, "loss": 1.4242, "step": 15732 }, { "epoch": 0.5634322344978244, "grad_norm": 1.5151009559631348, "learning_rate": 8.437660218532777e-05, "loss": 1.2717, "step": 15733 }, { "epoch": 0.5634680466273927, "grad_norm": 1.482073187828064, "learning_rate": 8.436514571253775e-05, "loss": 1.4709, "step": 15734 }, { "epoch": 0.563503858756961, "grad_norm": 1.9614146947860718, "learning_rate": 8.435368945009456e-05, "loss": 1.7183, "step": 15735 }, { "epoch": 0.5635396708865292, "grad_norm": 1.8879051208496094, "learning_rate": 8.434223339815229e-05, "loss": 1.5138, "step": 15736 }, { "epoch": 0.5635754830160975, "grad_norm": 1.917839527130127, "learning_rate": 8.433077755686506e-05, "loss": 1.5215, "step": 15737 }, { "epoch": 0.5636112951456659, "grad_norm": 1.6038079261779785, "learning_rate": 8.431932192638703e-05, "loss": 1.4635, "step": 15738 }, { "epoch": 0.5636471072752341, "grad_norm": 1.5450505018234253, "learning_rate": 8.430786650687227e-05, "loss": 1.6093, "step": 15739 }, { "epoch": 0.5636829194048024, "grad_norm": 1.6022473573684692, "learning_rate": 8.429641129847494e-05, "loss": 1.2194, "step": 15740 }, { "epoch": 0.5637187315343707, "grad_norm": 2.185281991958618, "learning_rate": 8.428495630134912e-05, "loss": 1.5651, "step": 15741 }, { "epoch": 0.563754543663939, "grad_norm": 1.5093828439712524, "learning_rate": 8.427350151564897e-05, "loss": 1.4803, "step": 15742 }, { "epoch": 0.5637903557935072, "grad_norm": 1.753310203552246, "learning_rate": 8.426204694152855e-05, "loss": 1.3101, "step": 15743 }, { "epoch": 0.5638261679230755, "grad_norm": 1.8272547721862793, "learning_rate": 8.425059257914201e-05, "loss": 1.466, "step": 15744 }, { "epoch": 0.5638619800526439, "grad_norm": 1.5649486780166626, "learning_rate": 8.423913842864342e-05, "loss": 1.433, "step": 15745 }, { "epoch": 0.5638977921822121, "grad_norm": 1.796422004699707, "learning_rate": 8.422768449018688e-05, "loss": 1.2773, "step": 15746 }, { "epoch": 0.5639336043117804, "grad_norm": 1.6080162525177002, "learning_rate": 8.421623076392652e-05, "loss": 1.522, "step": 15747 }, { "epoch": 0.5639694164413487, "grad_norm": 1.624840259552002, "learning_rate": 8.420477725001639e-05, "loss": 1.7583, "step": 15748 }, { "epoch": 0.564005228570917, "grad_norm": 1.7605829238891602, "learning_rate": 8.419332394861064e-05, "loss": 1.283, "step": 15749 }, { "epoch": 0.5640410407004852, "grad_norm": 1.488240361213684, "learning_rate": 8.418187085986329e-05, "loss": 1.3897, "step": 15750 }, { "epoch": 0.5640768528300535, "grad_norm": 1.4900915622711182, "learning_rate": 8.41704179839285e-05, "loss": 1.5596, "step": 15751 }, { "epoch": 0.5641126649596219, "grad_norm": 1.8292967081069946, "learning_rate": 8.415896532096034e-05, "loss": 1.4501, "step": 15752 }, { "epoch": 0.5641484770891901, "grad_norm": 1.3246054649353027, "learning_rate": 8.41475128711128e-05, "loss": 1.0903, "step": 15753 }, { "epoch": 0.5641842892187584, "grad_norm": 1.4729738235473633, "learning_rate": 8.413606063454008e-05, "loss": 1.3507, "step": 15754 }, { "epoch": 0.5642201013483267, "grad_norm": 1.7757606506347656, "learning_rate": 8.412460861139615e-05, "loss": 1.708, "step": 15755 }, { "epoch": 0.5642559134778949, "grad_norm": 3.3102619647979736, "learning_rate": 8.411315680183517e-05, "loss": 1.4701, "step": 15756 }, { "epoch": 0.5642917256074632, "grad_norm": 1.4510157108306885, "learning_rate": 8.410170520601115e-05, "loss": 1.3559, "step": 15757 }, { "epoch": 0.5643275377370315, "grad_norm": 1.7538570165634155, "learning_rate": 8.40902538240782e-05, "loss": 1.2939, "step": 15758 }, { "epoch": 0.5643633498665999, "grad_norm": 1.3784433603286743, "learning_rate": 8.407880265619035e-05, "loss": 1.5924, "step": 15759 }, { "epoch": 0.5643991619961681, "grad_norm": 2.172971248626709, "learning_rate": 8.406735170250168e-05, "loss": 1.3409, "step": 15760 }, { "epoch": 0.5644349741257364, "grad_norm": 2.134845733642578, "learning_rate": 8.405590096316626e-05, "loss": 1.2721, "step": 15761 }, { "epoch": 0.5644707862553047, "grad_norm": 1.4905674457550049, "learning_rate": 8.404445043833809e-05, "loss": 1.4646, "step": 15762 }, { "epoch": 0.5645065983848729, "grad_norm": 1.4775800704956055, "learning_rate": 8.40330001281713e-05, "loss": 1.5261, "step": 15763 }, { "epoch": 0.5645424105144412, "grad_norm": 1.478419303894043, "learning_rate": 8.402155003281984e-05, "loss": 1.569, "step": 15764 }, { "epoch": 0.5645782226440095, "grad_norm": 1.6768771409988403, "learning_rate": 8.401010015243787e-05, "loss": 1.6972, "step": 15765 }, { "epoch": 0.5646140347735779, "grad_norm": 1.3239349126815796, "learning_rate": 8.399865048717932e-05, "loss": 1.4826, "step": 15766 }, { "epoch": 0.5646498469031461, "grad_norm": 2.080510377883911, "learning_rate": 8.398720103719836e-05, "loss": 1.6596, "step": 15767 }, { "epoch": 0.5646856590327144, "grad_norm": 1.5443332195281982, "learning_rate": 8.397575180264887e-05, "loss": 1.2538, "step": 15768 }, { "epoch": 0.5647214711622827, "grad_norm": 1.3316525220870972, "learning_rate": 8.396430278368503e-05, "loss": 1.3754, "step": 15769 }, { "epoch": 0.5647572832918509, "grad_norm": 2.1287786960601807, "learning_rate": 8.395285398046084e-05, "loss": 1.6287, "step": 15770 }, { "epoch": 0.5647930954214192, "grad_norm": 1.7575713396072388, "learning_rate": 8.394140539313021e-05, "loss": 1.4228, "step": 15771 }, { "epoch": 0.5648289075509875, "grad_norm": 1.9802348613739014, "learning_rate": 8.392995702184734e-05, "loss": 1.5655, "step": 15772 }, { "epoch": 0.5648647196805558, "grad_norm": 1.6376569271087646, "learning_rate": 8.391850886676609e-05, "loss": 0.9234, "step": 15773 }, { "epoch": 0.5649005318101241, "grad_norm": 1.4419960975646973, "learning_rate": 8.390706092804064e-05, "loss": 1.4967, "step": 15774 }, { "epoch": 0.5649363439396924, "grad_norm": 1.8754823207855225, "learning_rate": 8.389561320582486e-05, "loss": 1.3997, "step": 15775 }, { "epoch": 0.5649721560692607, "grad_norm": 1.3436617851257324, "learning_rate": 8.388416570027289e-05, "loss": 1.5209, "step": 15776 }, { "epoch": 0.5650079681988289, "grad_norm": 1.7056835889816284, "learning_rate": 8.38727184115386e-05, "loss": 1.1724, "step": 15777 }, { "epoch": 0.5650437803283972, "grad_norm": 1.2598458528518677, "learning_rate": 8.386127133977617e-05, "loss": 1.352, "step": 15778 }, { "epoch": 0.5650795924579655, "grad_norm": 1.2932641506195068, "learning_rate": 8.384982448513949e-05, "loss": 1.3726, "step": 15779 }, { "epoch": 0.5651154045875338, "grad_norm": 1.6122196912765503, "learning_rate": 8.383837784778257e-05, "loss": 1.218, "step": 15780 }, { "epoch": 0.5651512167171021, "grad_norm": 1.5172953605651855, "learning_rate": 8.382693142785945e-05, "loss": 1.5963, "step": 15781 }, { "epoch": 0.5651870288466704, "grad_norm": 1.681890606880188, "learning_rate": 8.381548522552406e-05, "loss": 1.5023, "step": 15782 }, { "epoch": 0.5652228409762386, "grad_norm": 2.0911543369293213, "learning_rate": 8.38040392409305e-05, "loss": 1.6502, "step": 15783 }, { "epoch": 0.5652586531058069, "grad_norm": 2.043477773666382, "learning_rate": 8.379259347423265e-05, "loss": 1.4353, "step": 15784 }, { "epoch": 0.5652944652353752, "grad_norm": 1.461133360862732, "learning_rate": 8.37811479255846e-05, "loss": 1.5957, "step": 15785 }, { "epoch": 0.5653302773649435, "grad_norm": 1.580897331237793, "learning_rate": 8.376970259514023e-05, "loss": 1.1663, "step": 15786 }, { "epoch": 0.5653660894945118, "grad_norm": 1.5734902620315552, "learning_rate": 8.375825748305364e-05, "loss": 1.6092, "step": 15787 }, { "epoch": 0.5654019016240801, "grad_norm": 1.7830482721328735, "learning_rate": 8.37468125894787e-05, "loss": 1.5706, "step": 15788 }, { "epoch": 0.5654377137536484, "grad_norm": 2.396804094314575, "learning_rate": 8.373536791456944e-05, "loss": 1.6365, "step": 15789 }, { "epoch": 0.5654735258832166, "grad_norm": 1.630237340927124, "learning_rate": 8.372392345847983e-05, "loss": 1.693, "step": 15790 }, { "epoch": 0.5655093380127849, "grad_norm": 2.5660781860351562, "learning_rate": 8.371247922136383e-05, "loss": 1.2867, "step": 15791 }, { "epoch": 0.5655451501423532, "grad_norm": 1.5164788961410522, "learning_rate": 8.370103520337542e-05, "loss": 1.3451, "step": 15792 }, { "epoch": 0.5655809622719215, "grad_norm": 1.45986807346344, "learning_rate": 8.368959140466853e-05, "loss": 1.4041, "step": 15793 }, { "epoch": 0.5656167744014898, "grad_norm": 1.8049870729446411, "learning_rate": 8.367814782539718e-05, "loss": 1.7531, "step": 15794 }, { "epoch": 0.5656525865310581, "grad_norm": 2.0593514442443848, "learning_rate": 8.366670446571525e-05, "loss": 1.1339, "step": 15795 }, { "epoch": 0.5656883986606264, "grad_norm": 1.956852912902832, "learning_rate": 8.365526132577681e-05, "loss": 1.4611, "step": 15796 }, { "epoch": 0.5657242107901946, "grad_norm": 1.8688738346099854, "learning_rate": 8.364381840573573e-05, "loss": 1.3412, "step": 15797 }, { "epoch": 0.5657600229197629, "grad_norm": 1.790224313735962, "learning_rate": 8.363237570574595e-05, "loss": 1.1733, "step": 15798 }, { "epoch": 0.5657958350493312, "grad_norm": 1.8264939785003662, "learning_rate": 8.362093322596145e-05, "loss": 1.4683, "step": 15799 }, { "epoch": 0.5658316471788994, "grad_norm": 1.8965346813201904, "learning_rate": 8.360949096653616e-05, "loss": 1.797, "step": 15800 }, { "epoch": 0.5658674593084678, "grad_norm": 2.144782066345215, "learning_rate": 8.359804892762405e-05, "loss": 1.7707, "step": 15801 }, { "epoch": 0.5659032714380361, "grad_norm": 3.100724697113037, "learning_rate": 8.3586607109379e-05, "loss": 1.3111, "step": 15802 }, { "epoch": 0.5659390835676044, "grad_norm": 2.0397491455078125, "learning_rate": 8.357516551195501e-05, "loss": 1.4468, "step": 15803 }, { "epoch": 0.5659748956971726, "grad_norm": 1.3980594873428345, "learning_rate": 8.356372413550597e-05, "loss": 1.3259, "step": 15804 }, { "epoch": 0.5660107078267409, "grad_norm": 1.8807727098464966, "learning_rate": 8.355228298018582e-05, "loss": 1.7511, "step": 15805 }, { "epoch": 0.5660465199563092, "grad_norm": 2.131071090698242, "learning_rate": 8.354084204614851e-05, "loss": 1.2344, "step": 15806 }, { "epoch": 0.5660823320858774, "grad_norm": 1.6939911842346191, "learning_rate": 8.35294013335479e-05, "loss": 1.4542, "step": 15807 }, { "epoch": 0.5661181442154458, "grad_norm": 1.5454076528549194, "learning_rate": 8.351796084253797e-05, "loss": 1.4836, "step": 15808 }, { "epoch": 0.5661539563450141, "grad_norm": 1.7581214904785156, "learning_rate": 8.350652057327261e-05, "loss": 1.3919, "step": 15809 }, { "epoch": 0.5661897684745824, "grad_norm": 1.616428017616272, "learning_rate": 8.349508052590574e-05, "loss": 1.4052, "step": 15810 }, { "epoch": 0.5662255806041506, "grad_norm": 1.2832040786743164, "learning_rate": 8.348364070059127e-05, "loss": 1.1419, "step": 15811 }, { "epoch": 0.5662613927337189, "grad_norm": 1.918565034866333, "learning_rate": 8.347220109748312e-05, "loss": 1.5448, "step": 15812 }, { "epoch": 0.5662972048632872, "grad_norm": 2.462134838104248, "learning_rate": 8.346076171673518e-05, "loss": 1.3867, "step": 15813 }, { "epoch": 0.5663330169928554, "grad_norm": 1.435020923614502, "learning_rate": 8.344932255850136e-05, "loss": 1.2917, "step": 15814 }, { "epoch": 0.5663688291224238, "grad_norm": 1.4067466259002686, "learning_rate": 8.343788362293556e-05, "loss": 1.4601, "step": 15815 }, { "epoch": 0.5664046412519921, "grad_norm": 1.6416200399398804, "learning_rate": 8.342644491019165e-05, "loss": 1.3699, "step": 15816 }, { "epoch": 0.5664404533815603, "grad_norm": 1.66130793094635, "learning_rate": 8.341500642042359e-05, "loss": 1.3382, "step": 15817 }, { "epoch": 0.5664762655111286, "grad_norm": 1.7099733352661133, "learning_rate": 8.340356815378517e-05, "loss": 1.6387, "step": 15818 }, { "epoch": 0.5665120776406969, "grad_norm": 1.7127584218978882, "learning_rate": 8.339213011043038e-05, "loss": 1.3284, "step": 15819 }, { "epoch": 0.5665478897702652, "grad_norm": 1.6124918460845947, "learning_rate": 8.338069229051302e-05, "loss": 1.4032, "step": 15820 }, { "epoch": 0.5665837018998334, "grad_norm": 2.4808313846588135, "learning_rate": 8.336925469418704e-05, "loss": 1.5178, "step": 15821 }, { "epoch": 0.5666195140294018, "grad_norm": 1.6432664394378662, "learning_rate": 8.335781732160625e-05, "loss": 1.5208, "step": 15822 }, { "epoch": 0.5666553261589701, "grad_norm": 1.5675996541976929, "learning_rate": 8.334638017292459e-05, "loss": 1.5968, "step": 15823 }, { "epoch": 0.5666911382885383, "grad_norm": 2.1726925373077393, "learning_rate": 8.33349432482959e-05, "loss": 1.3455, "step": 15824 }, { "epoch": 0.5667269504181066, "grad_norm": 1.6445999145507812, "learning_rate": 8.332350654787404e-05, "loss": 1.6555, "step": 15825 }, { "epoch": 0.5667627625476749, "grad_norm": 1.3497015237808228, "learning_rate": 8.33120700718129e-05, "loss": 1.2937, "step": 15826 }, { "epoch": 0.5667985746772431, "grad_norm": 1.9332237243652344, "learning_rate": 8.330063382026631e-05, "loss": 1.7678, "step": 15827 }, { "epoch": 0.5668343868068114, "grad_norm": 2.0545902252197266, "learning_rate": 8.328919779338819e-05, "loss": 1.0707, "step": 15828 }, { "epoch": 0.5668701989363798, "grad_norm": 1.9223384857177734, "learning_rate": 8.327776199133232e-05, "loss": 1.0993, "step": 15829 }, { "epoch": 0.5669060110659481, "grad_norm": 1.645604133605957, "learning_rate": 8.326632641425261e-05, "loss": 1.661, "step": 15830 }, { "epoch": 0.5669418231955163, "grad_norm": 1.6659955978393555, "learning_rate": 8.325489106230288e-05, "loss": 1.2141, "step": 15831 }, { "epoch": 0.5669776353250846, "grad_norm": 1.6164517402648926, "learning_rate": 8.324345593563701e-05, "loss": 1.312, "step": 15832 }, { "epoch": 0.5670134474546529, "grad_norm": 2.98215913772583, "learning_rate": 8.323202103440884e-05, "loss": 1.5099, "step": 15833 }, { "epoch": 0.5670492595842211, "grad_norm": 1.6239739656448364, "learning_rate": 8.322058635877216e-05, "loss": 1.3561, "step": 15834 }, { "epoch": 0.5670850717137894, "grad_norm": 2.104954957962036, "learning_rate": 8.320915190888087e-05, "loss": 1.7427, "step": 15835 }, { "epoch": 0.5671208838433578, "grad_norm": 1.366011619567871, "learning_rate": 8.319771768488877e-05, "loss": 1.4357, "step": 15836 }, { "epoch": 0.5671566959729261, "grad_norm": 1.9174448251724243, "learning_rate": 8.318628368694972e-05, "loss": 1.3326, "step": 15837 }, { "epoch": 0.5671925081024943, "grad_norm": 1.4196544885635376, "learning_rate": 8.317484991521751e-05, "loss": 1.4263, "step": 15838 }, { "epoch": 0.5672283202320626, "grad_norm": 1.5104621648788452, "learning_rate": 8.316341636984602e-05, "loss": 1.535, "step": 15839 }, { "epoch": 0.5672641323616309, "grad_norm": 1.5836697816848755, "learning_rate": 8.315198305098902e-05, "loss": 1.2274, "step": 15840 }, { "epoch": 0.5672999444911991, "grad_norm": 2.0471384525299072, "learning_rate": 8.314054995880036e-05, "loss": 1.5095, "step": 15841 }, { "epoch": 0.5673357566207674, "grad_norm": 1.560638427734375, "learning_rate": 8.312911709343388e-05, "loss": 1.417, "step": 15842 }, { "epoch": 0.5673715687503358, "grad_norm": 1.8646968603134155, "learning_rate": 8.311768445504333e-05, "loss": 1.5827, "step": 15843 }, { "epoch": 0.567407380879904, "grad_norm": 1.4655557870864868, "learning_rate": 8.31062520437826e-05, "loss": 1.2498, "step": 15844 }, { "epoch": 0.5674431930094723, "grad_norm": 2.248730421066284, "learning_rate": 8.309481985980541e-05, "loss": 1.4068, "step": 15845 }, { "epoch": 0.5674790051390406, "grad_norm": 2.3204147815704346, "learning_rate": 8.308338790326565e-05, "loss": 1.7721, "step": 15846 }, { "epoch": 0.5675148172686089, "grad_norm": 1.9696449041366577, "learning_rate": 8.307195617431707e-05, "loss": 1.5491, "step": 15847 }, { "epoch": 0.5675506293981771, "grad_norm": 1.883335828781128, "learning_rate": 8.306052467311349e-05, "loss": 1.4826, "step": 15848 }, { "epoch": 0.5675864415277454, "grad_norm": 2.11018705368042, "learning_rate": 8.304909339980873e-05, "loss": 1.7215, "step": 15849 }, { "epoch": 0.5676222536573138, "grad_norm": 1.548668384552002, "learning_rate": 8.303766235455648e-05, "loss": 1.3243, "step": 15850 }, { "epoch": 0.567658065786882, "grad_norm": 1.4991328716278076, "learning_rate": 8.302623153751068e-05, "loss": 1.4878, "step": 15851 }, { "epoch": 0.5676938779164503, "grad_norm": 1.4898539781570435, "learning_rate": 8.301480094882497e-05, "loss": 1.2673, "step": 15852 }, { "epoch": 0.5677296900460186, "grad_norm": 1.7317248582839966, "learning_rate": 8.300337058865323e-05, "loss": 1.4307, "step": 15853 }, { "epoch": 0.5677655021755869, "grad_norm": 1.6654188632965088, "learning_rate": 8.299194045714921e-05, "loss": 1.5343, "step": 15854 }, { "epoch": 0.5678013143051551, "grad_norm": 1.7130259275436401, "learning_rate": 8.298051055446673e-05, "loss": 1.544, "step": 15855 }, { "epoch": 0.5678371264347234, "grad_norm": 3.522223472595215, "learning_rate": 8.296908088075949e-05, "loss": 1.9023, "step": 15856 }, { "epoch": 0.5678729385642918, "grad_norm": 2.1206347942352295, "learning_rate": 8.295765143618131e-05, "loss": 1.6537, "step": 15857 }, { "epoch": 0.56790875069386, "grad_norm": 2.9285621643066406, "learning_rate": 8.294622222088598e-05, "loss": 1.6514, "step": 15858 }, { "epoch": 0.5679445628234283, "grad_norm": 2.09845232963562, "learning_rate": 8.293479323502716e-05, "loss": 1.3896, "step": 15859 }, { "epoch": 0.5679803749529966, "grad_norm": 1.8847618103027344, "learning_rate": 8.292336447875876e-05, "loss": 1.6824, "step": 15860 }, { "epoch": 0.5680161870825648, "grad_norm": 3.045405149459839, "learning_rate": 8.291193595223438e-05, "loss": 1.5033, "step": 15861 }, { "epoch": 0.5680519992121331, "grad_norm": 1.4553380012512207, "learning_rate": 8.290050765560795e-05, "loss": 1.4361, "step": 15862 }, { "epoch": 0.5680878113417014, "grad_norm": 1.3813594579696655, "learning_rate": 8.288907958903305e-05, "loss": 1.7692, "step": 15863 }, { "epoch": 0.5681236234712698, "grad_norm": 1.9366446733474731, "learning_rate": 8.287765175266358e-05, "loss": 1.3047, "step": 15864 }, { "epoch": 0.568159435600838, "grad_norm": 2.792715549468994, "learning_rate": 8.286622414665317e-05, "loss": 2.0675, "step": 15865 }, { "epoch": 0.5681952477304063, "grad_norm": 1.838062047958374, "learning_rate": 8.285479677115563e-05, "loss": 1.4573, "step": 15866 }, { "epoch": 0.5682310598599746, "grad_norm": 1.3714712858200073, "learning_rate": 8.284336962632473e-05, "loss": 1.4702, "step": 15867 }, { "epoch": 0.5682668719895428, "grad_norm": 1.2245217561721802, "learning_rate": 8.283194271231408e-05, "loss": 1.5559, "step": 15868 }, { "epoch": 0.5683026841191111, "grad_norm": 1.866948127746582, "learning_rate": 8.282051602927757e-05, "loss": 1.5925, "step": 15869 }, { "epoch": 0.5683384962486794, "grad_norm": 1.918073058128357, "learning_rate": 8.28090895773688e-05, "loss": 1.2218, "step": 15870 }, { "epoch": 0.5683743083782478, "grad_norm": 2.7581140995025635, "learning_rate": 8.27976633567416e-05, "loss": 1.1811, "step": 15871 }, { "epoch": 0.568410120507816, "grad_norm": 1.6794708967208862, "learning_rate": 8.27862373675496e-05, "loss": 1.3733, "step": 15872 }, { "epoch": 0.5684459326373843, "grad_norm": 2.179267644882202, "learning_rate": 8.277481160994663e-05, "loss": 1.3167, "step": 15873 }, { "epoch": 0.5684817447669526, "grad_norm": 1.5156314373016357, "learning_rate": 8.276338608408627e-05, "loss": 1.2546, "step": 15874 }, { "epoch": 0.5685175568965208, "grad_norm": 1.3936090469360352, "learning_rate": 8.27519607901224e-05, "loss": 1.2427, "step": 15875 }, { "epoch": 0.5685533690260891, "grad_norm": 2.1718878746032715, "learning_rate": 8.274053572820862e-05, "loss": 1.238, "step": 15876 }, { "epoch": 0.5685891811556574, "grad_norm": 1.6128958463668823, "learning_rate": 8.272911089849866e-05, "loss": 1.6238, "step": 15877 }, { "epoch": 0.5686249932852258, "grad_norm": 1.745404601097107, "learning_rate": 8.271768630114624e-05, "loss": 1.3826, "step": 15878 }, { "epoch": 0.568660805414794, "grad_norm": 1.595452070236206, "learning_rate": 8.270626193630503e-05, "loss": 1.4595, "step": 15879 }, { "epoch": 0.5686966175443623, "grad_norm": 1.5459754467010498, "learning_rate": 8.269483780412883e-05, "loss": 1.2893, "step": 15880 }, { "epoch": 0.5687324296739306, "grad_norm": 1.7879676818847656, "learning_rate": 8.268341390477118e-05, "loss": 1.8261, "step": 15881 }, { "epoch": 0.5687682418034988, "grad_norm": 1.4515905380249023, "learning_rate": 8.267199023838593e-05, "loss": 1.7552, "step": 15882 }, { "epoch": 0.5688040539330671, "grad_norm": 1.9007455110549927, "learning_rate": 8.266056680512664e-05, "loss": 1.4626, "step": 15883 }, { "epoch": 0.5688398660626354, "grad_norm": 1.7271119356155396, "learning_rate": 8.26491436051471e-05, "loss": 1.5642, "step": 15884 }, { "epoch": 0.5688756781922037, "grad_norm": 1.5597244501113892, "learning_rate": 8.263772063860096e-05, "loss": 1.3979, "step": 15885 }, { "epoch": 0.568911490321772, "grad_norm": 1.8739885091781616, "learning_rate": 8.262629790564186e-05, "loss": 1.6296, "step": 15886 }, { "epoch": 0.5689473024513403, "grad_norm": 1.602601408958435, "learning_rate": 8.261487540642353e-05, "loss": 1.619, "step": 15887 }, { "epoch": 0.5689831145809086, "grad_norm": 1.4145638942718506, "learning_rate": 8.26034531410996e-05, "loss": 1.5039, "step": 15888 }, { "epoch": 0.5690189267104768, "grad_norm": 1.6475313901901245, "learning_rate": 8.259203110982381e-05, "loss": 1.2161, "step": 15889 }, { "epoch": 0.5690547388400451, "grad_norm": 1.7352582216262817, "learning_rate": 8.258060931274976e-05, "loss": 1.41, "step": 15890 }, { "epoch": 0.5690905509696134, "grad_norm": 1.9929988384246826, "learning_rate": 8.256918775003115e-05, "loss": 1.3584, "step": 15891 }, { "epoch": 0.5691263630991817, "grad_norm": 1.258603811264038, "learning_rate": 8.255776642182159e-05, "loss": 1.276, "step": 15892 }, { "epoch": 0.56916217522875, "grad_norm": 1.3469874858856201, "learning_rate": 8.254634532827487e-05, "loss": 1.661, "step": 15893 }, { "epoch": 0.5691979873583183, "grad_norm": 1.4749658107757568, "learning_rate": 8.253492446954452e-05, "loss": 1.7263, "step": 15894 }, { "epoch": 0.5692337994878865, "grad_norm": 1.9066383838653564, "learning_rate": 8.252350384578421e-05, "loss": 1.4895, "step": 15895 }, { "epoch": 0.5692696116174548, "grad_norm": 2.15698504447937, "learning_rate": 8.251208345714764e-05, "loss": 1.444, "step": 15896 }, { "epoch": 0.5693054237470231, "grad_norm": 1.5320926904678345, "learning_rate": 8.25006633037884e-05, "loss": 1.5376, "step": 15897 }, { "epoch": 0.5693412358765914, "grad_norm": 1.3792502880096436, "learning_rate": 8.24892433858602e-05, "loss": 1.6099, "step": 15898 }, { "epoch": 0.5693770480061597, "grad_norm": 1.6922613382339478, "learning_rate": 8.247782370351663e-05, "loss": 1.3959, "step": 15899 }, { "epoch": 0.569412860135728, "grad_norm": 2.027216911315918, "learning_rate": 8.246640425691133e-05, "loss": 1.516, "step": 15900 }, { "epoch": 0.5694486722652963, "grad_norm": 1.6750237941741943, "learning_rate": 8.245498504619794e-05, "loss": 1.1921, "step": 15901 }, { "epoch": 0.5694844843948645, "grad_norm": 1.6753225326538086, "learning_rate": 8.244356607153011e-05, "loss": 1.5097, "step": 15902 }, { "epoch": 0.5695202965244328, "grad_norm": 1.3187848329544067, "learning_rate": 8.243214733306145e-05, "loss": 1.4227, "step": 15903 }, { "epoch": 0.5695561086540011, "grad_norm": 1.5426853895187378, "learning_rate": 8.242072883094559e-05, "loss": 1.5042, "step": 15904 }, { "epoch": 0.5695919207835693, "grad_norm": 1.53534996509552, "learning_rate": 8.240931056533615e-05, "loss": 1.2341, "step": 15905 }, { "epoch": 0.5696277329131377, "grad_norm": 1.2395790815353394, "learning_rate": 8.239789253638672e-05, "loss": 1.2779, "step": 15906 }, { "epoch": 0.569663545042706, "grad_norm": 1.5523747205734253, "learning_rate": 8.238647474425097e-05, "loss": 1.6073, "step": 15907 }, { "epoch": 0.5696993571722743, "grad_norm": 1.5684142112731934, "learning_rate": 8.237505718908246e-05, "loss": 1.5768, "step": 15908 }, { "epoch": 0.5697351693018425, "grad_norm": 1.7088567018508911, "learning_rate": 8.236363987103483e-05, "loss": 1.4598, "step": 15909 }, { "epoch": 0.5697709814314108, "grad_norm": 1.5773688554763794, "learning_rate": 8.235222279026168e-05, "loss": 1.2386, "step": 15910 }, { "epoch": 0.5698067935609791, "grad_norm": 1.7507705688476562, "learning_rate": 8.234080594691663e-05, "loss": 1.5207, "step": 15911 }, { "epoch": 0.5698426056905473, "grad_norm": 1.7203186750411987, "learning_rate": 8.232938934115323e-05, "loss": 1.472, "step": 15912 }, { "epoch": 0.5698784178201157, "grad_norm": 1.6175166368484497, "learning_rate": 8.231797297312509e-05, "loss": 1.6451, "step": 15913 }, { "epoch": 0.569914229949684, "grad_norm": 1.395760416984558, "learning_rate": 8.230655684298585e-05, "loss": 1.4501, "step": 15914 }, { "epoch": 0.5699500420792523, "grad_norm": 1.8864084482192993, "learning_rate": 8.229514095088903e-05, "loss": 1.4636, "step": 15915 }, { "epoch": 0.5699858542088205, "grad_norm": 1.705157995223999, "learning_rate": 8.228372529698828e-05, "loss": 1.5955, "step": 15916 }, { "epoch": 0.5700216663383888, "grad_norm": 2.0784878730773926, "learning_rate": 8.227230988143712e-05, "loss": 1.3895, "step": 15917 }, { "epoch": 0.5700574784679571, "grad_norm": 1.4656957387924194, "learning_rate": 8.22608947043892e-05, "loss": 1.4649, "step": 15918 }, { "epoch": 0.5700932905975253, "grad_norm": 1.8664727210998535, "learning_rate": 8.224947976599804e-05, "loss": 1.6218, "step": 15919 }, { "epoch": 0.5701291027270937, "grad_norm": 1.4809681177139282, "learning_rate": 8.223806506641724e-05, "loss": 1.4457, "step": 15920 }, { "epoch": 0.570164914856662, "grad_norm": 1.9111181497573853, "learning_rate": 8.222665060580038e-05, "loss": 1.413, "step": 15921 }, { "epoch": 0.5702007269862303, "grad_norm": 1.56305992603302, "learning_rate": 8.221523638430098e-05, "loss": 1.2293, "step": 15922 }, { "epoch": 0.5702365391157985, "grad_norm": 1.4683891534805298, "learning_rate": 8.220382240207266e-05, "loss": 1.5771, "step": 15923 }, { "epoch": 0.5702723512453668, "grad_norm": 1.7615280151367188, "learning_rate": 8.219240865926892e-05, "loss": 1.4823, "step": 15924 }, { "epoch": 0.5703081633749351, "grad_norm": 2.0023019313812256, "learning_rate": 8.218099515604339e-05, "loss": 1.4878, "step": 15925 }, { "epoch": 0.5703439755045033, "grad_norm": 1.7098430395126343, "learning_rate": 8.216958189254956e-05, "loss": 1.6951, "step": 15926 }, { "epoch": 0.5703797876340717, "grad_norm": 1.6771968603134155, "learning_rate": 8.215816886894102e-05, "loss": 1.3032, "step": 15927 }, { "epoch": 0.57041559976364, "grad_norm": 1.426419973373413, "learning_rate": 8.214675608537128e-05, "loss": 1.1594, "step": 15928 }, { "epoch": 0.5704514118932082, "grad_norm": 2.378345012664795, "learning_rate": 8.213534354199392e-05, "loss": 1.9314, "step": 15929 }, { "epoch": 0.5704872240227765, "grad_norm": 1.3041222095489502, "learning_rate": 8.212393123896249e-05, "loss": 1.5663, "step": 15930 }, { "epoch": 0.5705230361523448, "grad_norm": 1.2427996397018433, "learning_rate": 8.211251917643047e-05, "loss": 1.4987, "step": 15931 }, { "epoch": 0.5705588482819131, "grad_norm": 1.7306779623031616, "learning_rate": 8.210110735455147e-05, "loss": 1.4224, "step": 15932 }, { "epoch": 0.5705946604114813, "grad_norm": 1.6303404569625854, "learning_rate": 8.208969577347894e-05, "loss": 1.443, "step": 15933 }, { "epoch": 0.5706304725410497, "grad_norm": 1.9509263038635254, "learning_rate": 8.207828443336649e-05, "loss": 1.3303, "step": 15934 }, { "epoch": 0.570666284670618, "grad_norm": 2.3029608726501465, "learning_rate": 8.206687333436758e-05, "loss": 1.4231, "step": 15935 }, { "epoch": 0.5707020968001862, "grad_norm": 1.5229395627975464, "learning_rate": 8.205546247663578e-05, "loss": 1.52, "step": 15936 }, { "epoch": 0.5707379089297545, "grad_norm": 2.0228278636932373, "learning_rate": 8.204405186032455e-05, "loss": 1.7603, "step": 15937 }, { "epoch": 0.5707737210593228, "grad_norm": 1.6591273546218872, "learning_rate": 8.203264148558749e-05, "loss": 1.3386, "step": 15938 }, { "epoch": 0.570809533188891, "grad_norm": 1.5935134887695312, "learning_rate": 8.202123135257804e-05, "loss": 1.514, "step": 15939 }, { "epoch": 0.5708453453184593, "grad_norm": 1.514012098312378, "learning_rate": 8.20098214614497e-05, "loss": 1.6337, "step": 15940 }, { "epoch": 0.5708811574480277, "grad_norm": 1.718827724456787, "learning_rate": 8.199841181235606e-05, "loss": 1.4344, "step": 15941 }, { "epoch": 0.570916969577596, "grad_norm": 2.1700141429901123, "learning_rate": 8.198700240545053e-05, "loss": 1.3989, "step": 15942 }, { "epoch": 0.5709527817071642, "grad_norm": 1.856092929840088, "learning_rate": 8.197559324088666e-05, "loss": 1.6977, "step": 15943 }, { "epoch": 0.5709885938367325, "grad_norm": 1.462905764579773, "learning_rate": 8.196418431881793e-05, "loss": 1.6105, "step": 15944 }, { "epoch": 0.5710244059663008, "grad_norm": 2.0627965927124023, "learning_rate": 8.195277563939785e-05, "loss": 1.6915, "step": 15945 }, { "epoch": 0.571060218095869, "grad_norm": 1.770424246788025, "learning_rate": 8.194136720277992e-05, "loss": 1.4564, "step": 15946 }, { "epoch": 0.5710960302254373, "grad_norm": 3.049556255340576, "learning_rate": 8.192995900911751e-05, "loss": 1.3256, "step": 15947 }, { "epoch": 0.5711318423550057, "grad_norm": 1.7785724401474, "learning_rate": 8.191855105856428e-05, "loss": 1.4055, "step": 15948 }, { "epoch": 0.571167654484574, "grad_norm": 1.4011263847351074, "learning_rate": 8.190714335127356e-05, "loss": 1.401, "step": 15949 }, { "epoch": 0.5712034666141422, "grad_norm": 1.9322203397750854, "learning_rate": 8.189573588739892e-05, "loss": 1.5981, "step": 15950 }, { "epoch": 0.5712392787437105, "grad_norm": 2.2392494678497314, "learning_rate": 8.188432866709379e-05, "loss": 1.5755, "step": 15951 }, { "epoch": 0.5712750908732788, "grad_norm": 1.8817596435546875, "learning_rate": 8.187292169051168e-05, "loss": 1.4319, "step": 15952 }, { "epoch": 0.571310903002847, "grad_norm": 1.4532921314239502, "learning_rate": 8.186151495780598e-05, "loss": 1.2006, "step": 15953 }, { "epoch": 0.5713467151324153, "grad_norm": 2.218801975250244, "learning_rate": 8.185010846913024e-05, "loss": 1.4041, "step": 15954 }, { "epoch": 0.5713825272619837, "grad_norm": 1.6853936910629272, "learning_rate": 8.183870222463789e-05, "loss": 1.5148, "step": 15955 }, { "epoch": 0.571418339391552, "grad_norm": 1.7166179418563843, "learning_rate": 8.182729622448231e-05, "loss": 1.6018, "step": 15956 }, { "epoch": 0.5714541515211202, "grad_norm": 1.7702536582946777, "learning_rate": 8.181589046881709e-05, "loss": 1.4854, "step": 15957 }, { "epoch": 0.5714899636506885, "grad_norm": 1.2718796730041504, "learning_rate": 8.180448495779554e-05, "loss": 1.5774, "step": 15958 }, { "epoch": 0.5715257757802568, "grad_norm": 1.6079905033111572, "learning_rate": 8.179307969157123e-05, "loss": 1.2911, "step": 15959 }, { "epoch": 0.571561587909825, "grad_norm": 1.5168037414550781, "learning_rate": 8.17816746702975e-05, "loss": 1.6445, "step": 15960 }, { "epoch": 0.5715974000393933, "grad_norm": 1.877971887588501, "learning_rate": 8.177026989412789e-05, "loss": 1.2533, "step": 15961 }, { "epoch": 0.5716332121689617, "grad_norm": 1.4898743629455566, "learning_rate": 8.175886536321574e-05, "loss": 1.2705, "step": 15962 }, { "epoch": 0.57166902429853, "grad_norm": 1.495103359222412, "learning_rate": 8.174746107771454e-05, "loss": 1.6275, "step": 15963 }, { "epoch": 0.5717048364280982, "grad_norm": 1.7005457878112793, "learning_rate": 8.173605703777774e-05, "loss": 1.5007, "step": 15964 }, { "epoch": 0.5717406485576665, "grad_norm": 1.619920253753662, "learning_rate": 8.172465324355868e-05, "loss": 1.3175, "step": 15965 }, { "epoch": 0.5717764606872348, "grad_norm": 1.668201208114624, "learning_rate": 8.171324969521089e-05, "loss": 1.1773, "step": 15966 }, { "epoch": 0.571812272816803, "grad_norm": 1.5060385465621948, "learning_rate": 8.170184639288767e-05, "loss": 1.5491, "step": 15967 }, { "epoch": 0.5718480849463713, "grad_norm": 1.9399768114089966, "learning_rate": 8.169044333674259e-05, "loss": 1.7142, "step": 15968 }, { "epoch": 0.5718838970759397, "grad_norm": 1.6110990047454834, "learning_rate": 8.167904052692889e-05, "loss": 1.3316, "step": 15969 }, { "epoch": 0.5719197092055079, "grad_norm": 1.6544129848480225, "learning_rate": 8.166763796360014e-05, "loss": 1.2218, "step": 15970 }, { "epoch": 0.5719555213350762, "grad_norm": 1.499854564666748, "learning_rate": 8.165623564690961e-05, "loss": 1.5932, "step": 15971 }, { "epoch": 0.5719913334646445, "grad_norm": 1.706298589706421, "learning_rate": 8.164483357701082e-05, "loss": 1.4479, "step": 15972 }, { "epoch": 0.5720271455942127, "grad_norm": 1.6135319471359253, "learning_rate": 8.163343175405712e-05, "loss": 1.3372, "step": 15973 }, { "epoch": 0.572062957723781, "grad_norm": 1.8496217727661133, "learning_rate": 8.162203017820186e-05, "loss": 1.4587, "step": 15974 }, { "epoch": 0.5720987698533493, "grad_norm": 1.8537300825119019, "learning_rate": 8.161062884959852e-05, "loss": 1.5282, "step": 15975 }, { "epoch": 0.5721345819829177, "grad_norm": 1.6979246139526367, "learning_rate": 8.159922776840039e-05, "loss": 1.4608, "step": 15976 }, { "epoch": 0.5721703941124859, "grad_norm": 1.7887334823608398, "learning_rate": 8.158782693476099e-05, "loss": 1.6018, "step": 15977 }, { "epoch": 0.5722062062420542, "grad_norm": 1.8376210927963257, "learning_rate": 8.157642634883355e-05, "loss": 1.3104, "step": 15978 }, { "epoch": 0.5722420183716225, "grad_norm": 1.831979751586914, "learning_rate": 8.156502601077159e-05, "loss": 1.363, "step": 15979 }, { "epoch": 0.5722778305011907, "grad_norm": 1.3710800409317017, "learning_rate": 8.155362592072837e-05, "loss": 1.4114, "step": 15980 }, { "epoch": 0.572313642630759, "grad_norm": 1.5481157302856445, "learning_rate": 8.15422260788574e-05, "loss": 1.4933, "step": 15981 }, { "epoch": 0.5723494547603273, "grad_norm": 1.3802767992019653, "learning_rate": 8.153082648531192e-05, "loss": 1.6345, "step": 15982 }, { "epoch": 0.5723852668898957, "grad_norm": 1.3365625143051147, "learning_rate": 8.151942714024534e-05, "loss": 1.3163, "step": 15983 }, { "epoch": 0.5724210790194639, "grad_norm": 1.9832862615585327, "learning_rate": 8.150802804381105e-05, "loss": 1.4807, "step": 15984 }, { "epoch": 0.5724568911490322, "grad_norm": 2.2987887859344482, "learning_rate": 8.149662919616238e-05, "loss": 1.6213, "step": 15985 }, { "epoch": 0.5724927032786005, "grad_norm": 1.9295026063919067, "learning_rate": 8.14852305974527e-05, "loss": 1.5155, "step": 15986 }, { "epoch": 0.5725285154081687, "grad_norm": 2.3797919750213623, "learning_rate": 8.147383224783534e-05, "loss": 1.4002, "step": 15987 }, { "epoch": 0.572564327537737, "grad_norm": 1.760378360748291, "learning_rate": 8.146243414746371e-05, "loss": 1.4426, "step": 15988 }, { "epoch": 0.5726001396673053, "grad_norm": 1.804693579673767, "learning_rate": 8.145103629649104e-05, "loss": 1.3964, "step": 15989 }, { "epoch": 0.5726359517968737, "grad_norm": 2.0817153453826904, "learning_rate": 8.143963869507085e-05, "loss": 1.2892, "step": 15990 }, { "epoch": 0.5726717639264419, "grad_norm": 2.2405829429626465, "learning_rate": 8.142824134335633e-05, "loss": 1.5307, "step": 15991 }, { "epoch": 0.5727075760560102, "grad_norm": 1.2104196548461914, "learning_rate": 8.141684424150087e-05, "loss": 1.6489, "step": 15992 }, { "epoch": 0.5727433881855785, "grad_norm": 1.529676079750061, "learning_rate": 8.14054473896578e-05, "loss": 1.5803, "step": 15993 }, { "epoch": 0.5727792003151467, "grad_norm": 1.550414800643921, "learning_rate": 8.139405078798044e-05, "loss": 1.6136, "step": 15994 }, { "epoch": 0.572815012444715, "grad_norm": 1.7423759698867798, "learning_rate": 8.138265443662215e-05, "loss": 1.6426, "step": 15995 }, { "epoch": 0.5728508245742833, "grad_norm": 1.7016830444335938, "learning_rate": 8.137125833573622e-05, "loss": 1.4305, "step": 15996 }, { "epoch": 0.5728866367038516, "grad_norm": 2.0163040161132812, "learning_rate": 8.135986248547597e-05, "loss": 1.7001, "step": 15997 }, { "epoch": 0.5729224488334199, "grad_norm": 1.8785911798477173, "learning_rate": 8.134846688599473e-05, "loss": 1.4775, "step": 15998 }, { "epoch": 0.5729582609629882, "grad_norm": 1.5626881122589111, "learning_rate": 8.133707153744582e-05, "loss": 1.2578, "step": 15999 }, { "epoch": 0.5729940730925565, "grad_norm": 1.6098331212997437, "learning_rate": 8.132567643998254e-05, "loss": 1.4393, "step": 16000 }, { "epoch": 0.5730298852221247, "grad_norm": 1.5904412269592285, "learning_rate": 8.131428159375817e-05, "loss": 1.4826, "step": 16001 }, { "epoch": 0.573065697351693, "grad_norm": 2.667109489440918, "learning_rate": 8.130288699892608e-05, "loss": 1.6257, "step": 16002 }, { "epoch": 0.5731015094812613, "grad_norm": 1.7234574556350708, "learning_rate": 8.129149265563947e-05, "loss": 1.4741, "step": 16003 }, { "epoch": 0.5731373216108296, "grad_norm": 2.2013614177703857, "learning_rate": 8.128009856405174e-05, "loss": 1.6424, "step": 16004 }, { "epoch": 0.5731731337403979, "grad_norm": 1.7291687726974487, "learning_rate": 8.126870472431613e-05, "loss": 1.4264, "step": 16005 }, { "epoch": 0.5732089458699662, "grad_norm": 1.8733566999435425, "learning_rate": 8.125731113658594e-05, "loss": 1.3412, "step": 16006 }, { "epoch": 0.5732447579995344, "grad_norm": 1.6369208097457886, "learning_rate": 8.124591780101443e-05, "loss": 1.4097, "step": 16007 }, { "epoch": 0.5732805701291027, "grad_norm": 1.460530400276184, "learning_rate": 8.123452471775493e-05, "loss": 1.4468, "step": 16008 }, { "epoch": 0.573316382258671, "grad_norm": 2.263429880142212, "learning_rate": 8.122313188696068e-05, "loss": 1.2833, "step": 16009 }, { "epoch": 0.5733521943882393, "grad_norm": 1.4812617301940918, "learning_rate": 8.121173930878496e-05, "loss": 1.3627, "step": 16010 }, { "epoch": 0.5733880065178076, "grad_norm": 1.905060052871704, "learning_rate": 8.120034698338108e-05, "loss": 1.6278, "step": 16011 }, { "epoch": 0.5734238186473759, "grad_norm": 1.6872472763061523, "learning_rate": 8.118895491090225e-05, "loss": 1.425, "step": 16012 }, { "epoch": 0.5734596307769442, "grad_norm": 1.7046935558319092, "learning_rate": 8.11775630915018e-05, "loss": 1.628, "step": 16013 }, { "epoch": 0.5734954429065124, "grad_norm": 1.5307683944702148, "learning_rate": 8.116617152533292e-05, "loss": 1.6749, "step": 16014 }, { "epoch": 0.5735312550360807, "grad_norm": 1.8192733526229858, "learning_rate": 8.115478021254895e-05, "loss": 1.5034, "step": 16015 }, { "epoch": 0.573567067165649, "grad_norm": 1.5006341934204102, "learning_rate": 8.114338915330307e-05, "loss": 1.0562, "step": 16016 }, { "epoch": 0.5736028792952172, "grad_norm": 1.6144181489944458, "learning_rate": 8.113199834774858e-05, "loss": 1.4468, "step": 16017 }, { "epoch": 0.5736386914247856, "grad_norm": 1.690004825592041, "learning_rate": 8.112060779603873e-05, "loss": 1.3783, "step": 16018 }, { "epoch": 0.5736745035543539, "grad_norm": 1.5593258142471313, "learning_rate": 8.110921749832672e-05, "loss": 1.6404, "step": 16019 }, { "epoch": 0.5737103156839222, "grad_norm": 2.3472657203674316, "learning_rate": 8.109782745476585e-05, "loss": 1.8699, "step": 16020 }, { "epoch": 0.5737461278134904, "grad_norm": 1.9007211923599243, "learning_rate": 8.108643766550929e-05, "loss": 1.3917, "step": 16021 }, { "epoch": 0.5737819399430587, "grad_norm": 1.5271756649017334, "learning_rate": 8.107504813071036e-05, "loss": 1.498, "step": 16022 }, { "epoch": 0.573817752072627, "grad_norm": 1.8045490980148315, "learning_rate": 8.10636588505222e-05, "loss": 1.4419, "step": 16023 }, { "epoch": 0.5738535642021952, "grad_norm": 1.7356674671173096, "learning_rate": 8.105226982509812e-05, "loss": 1.7494, "step": 16024 }, { "epoch": 0.5738893763317636, "grad_norm": 1.2682982683181763, "learning_rate": 8.10408810545913e-05, "loss": 1.3909, "step": 16025 }, { "epoch": 0.5739251884613319, "grad_norm": 1.9603110551834106, "learning_rate": 8.102949253915497e-05, "loss": 1.4457, "step": 16026 }, { "epoch": 0.5739610005909002, "grad_norm": 1.7176648378372192, "learning_rate": 8.101810427894236e-05, "loss": 1.6139, "step": 16027 }, { "epoch": 0.5739968127204684, "grad_norm": 1.5585652589797974, "learning_rate": 8.100671627410664e-05, "loss": 1.4038, "step": 16028 }, { "epoch": 0.5740326248500367, "grad_norm": 1.3921308517456055, "learning_rate": 8.099532852480108e-05, "loss": 1.4396, "step": 16029 }, { "epoch": 0.574068436979605, "grad_norm": 1.6771459579467773, "learning_rate": 8.098394103117885e-05, "loss": 1.4582, "step": 16030 }, { "epoch": 0.5741042491091732, "grad_norm": 1.6584618091583252, "learning_rate": 8.097255379339317e-05, "loss": 1.3684, "step": 16031 }, { "epoch": 0.5741400612387416, "grad_norm": 1.7140179872512817, "learning_rate": 8.096116681159722e-05, "loss": 1.2808, "step": 16032 }, { "epoch": 0.5741758733683099, "grad_norm": 1.8170238733291626, "learning_rate": 8.094978008594423e-05, "loss": 1.5748, "step": 16033 }, { "epoch": 0.5742116854978782, "grad_norm": 1.9986485242843628, "learning_rate": 8.093839361658735e-05, "loss": 1.4221, "step": 16034 }, { "epoch": 0.5742474976274464, "grad_norm": 1.4016669988632202, "learning_rate": 8.092700740367983e-05, "loss": 1.4064, "step": 16035 }, { "epoch": 0.5742833097570147, "grad_norm": 1.619900107383728, "learning_rate": 8.091562144737481e-05, "loss": 1.6672, "step": 16036 }, { "epoch": 0.574319121886583, "grad_norm": 1.6384003162384033, "learning_rate": 8.090423574782549e-05, "loss": 1.5615, "step": 16037 }, { "epoch": 0.5743549340161512, "grad_norm": 1.4524556398391724, "learning_rate": 8.089285030518504e-05, "loss": 1.5234, "step": 16038 }, { "epoch": 0.5743907461457196, "grad_norm": 1.6690996885299683, "learning_rate": 8.088146511960663e-05, "loss": 1.8467, "step": 16039 }, { "epoch": 0.5744265582752879, "grad_norm": 1.6018950939178467, "learning_rate": 8.087008019124347e-05, "loss": 1.1323, "step": 16040 }, { "epoch": 0.5744623704048561, "grad_norm": 1.966084599494934, "learning_rate": 8.085869552024869e-05, "loss": 1.4668, "step": 16041 }, { "epoch": 0.5744981825344244, "grad_norm": 1.7654863595962524, "learning_rate": 8.084731110677548e-05, "loss": 1.4355, "step": 16042 }, { "epoch": 0.5745339946639927, "grad_norm": 1.6190152168273926, "learning_rate": 8.083592695097702e-05, "loss": 1.531, "step": 16043 }, { "epoch": 0.574569806793561, "grad_norm": 1.6661125421524048, "learning_rate": 8.082454305300637e-05, "loss": 1.3279, "step": 16044 }, { "epoch": 0.5746056189231292, "grad_norm": 1.7820805311203003, "learning_rate": 8.081315941301683e-05, "loss": 1.6347, "step": 16045 }, { "epoch": 0.5746414310526975, "grad_norm": 1.4383074045181274, "learning_rate": 8.080177603116142e-05, "loss": 1.7054, "step": 16046 }, { "epoch": 0.5746772431822659, "grad_norm": 1.4362151622772217, "learning_rate": 8.079039290759341e-05, "loss": 1.6129, "step": 16047 }, { "epoch": 0.5747130553118341, "grad_norm": 1.533974051475525, "learning_rate": 8.077901004246584e-05, "loss": 1.7049, "step": 16048 }, { "epoch": 0.5747488674414024, "grad_norm": 1.90034019947052, "learning_rate": 8.076762743593191e-05, "loss": 1.4234, "step": 16049 }, { "epoch": 0.5747846795709707, "grad_norm": 1.5940901041030884, "learning_rate": 8.075624508814474e-05, "loss": 1.4504, "step": 16050 }, { "epoch": 0.574820491700539, "grad_norm": 1.7032641172409058, "learning_rate": 8.074486299925749e-05, "loss": 1.4603, "step": 16051 }, { "epoch": 0.5748563038301072, "grad_norm": 1.4532485008239746, "learning_rate": 8.073348116942329e-05, "loss": 1.7031, "step": 16052 }, { "epoch": 0.5748921159596755, "grad_norm": 2.4565727710723877, "learning_rate": 8.072209959879517e-05, "loss": 1.1419, "step": 16053 }, { "epoch": 0.5749279280892439, "grad_norm": 1.4893320798873901, "learning_rate": 8.071071828752643e-05, "loss": 1.2288, "step": 16054 }, { "epoch": 0.5749637402188121, "grad_norm": 1.7563259601593018, "learning_rate": 8.069933723577e-05, "loss": 1.6753, "step": 16055 }, { "epoch": 0.5749995523483804, "grad_norm": 2.3505520820617676, "learning_rate": 8.068795644367918e-05, "loss": 1.5375, "step": 16056 }, { "epoch": 0.5750353644779487, "grad_norm": 1.5410603284835815, "learning_rate": 8.06765759114069e-05, "loss": 1.742, "step": 16057 }, { "epoch": 0.5750711766075169, "grad_norm": 1.543412446975708, "learning_rate": 8.066519563910645e-05, "loss": 1.3408, "step": 16058 }, { "epoch": 0.5751069887370852, "grad_norm": 2.5051183700561523, "learning_rate": 8.065381562693078e-05, "loss": 1.6021, "step": 16059 }, { "epoch": 0.5751428008666535, "grad_norm": 2.775167942047119, "learning_rate": 8.064243587503313e-05, "loss": 1.4534, "step": 16060 }, { "epoch": 0.5751786129962219, "grad_norm": 1.8102688789367676, "learning_rate": 8.063105638356654e-05, "loss": 1.2111, "step": 16061 }, { "epoch": 0.5752144251257901, "grad_norm": 2.6372623443603516, "learning_rate": 8.061967715268403e-05, "loss": 1.7916, "step": 16062 }, { "epoch": 0.5752502372553584, "grad_norm": 1.2923922538757324, "learning_rate": 8.060829818253884e-05, "loss": 1.3766, "step": 16063 }, { "epoch": 0.5752860493849267, "grad_norm": 1.690511703491211, "learning_rate": 8.059691947328391e-05, "loss": 1.5836, "step": 16064 }, { "epoch": 0.5753218615144949, "grad_norm": 1.6254609823226929, "learning_rate": 8.058554102507248e-05, "loss": 1.6672, "step": 16065 }, { "epoch": 0.5753576736440632, "grad_norm": 1.411546230316162, "learning_rate": 8.057416283805748e-05, "loss": 1.4695, "step": 16066 }, { "epoch": 0.5753934857736315, "grad_norm": 1.4763381481170654, "learning_rate": 8.056278491239213e-05, "loss": 1.1461, "step": 16067 }, { "epoch": 0.5754292979031999, "grad_norm": 2.3086869716644287, "learning_rate": 8.055140724822938e-05, "loss": 1.614, "step": 16068 }, { "epoch": 0.5754651100327681, "grad_norm": 1.1650549173355103, "learning_rate": 8.054002984572241e-05, "loss": 1.2589, "step": 16069 }, { "epoch": 0.5755009221623364, "grad_norm": 1.5132914781570435, "learning_rate": 8.052865270502422e-05, "loss": 1.4143, "step": 16070 }, { "epoch": 0.5755367342919047, "grad_norm": 1.7870749235153198, "learning_rate": 8.051727582628788e-05, "loss": 1.3014, "step": 16071 }, { "epoch": 0.5755725464214729, "grad_norm": 1.9012000560760498, "learning_rate": 8.050589920966647e-05, "loss": 1.5146, "step": 16072 }, { "epoch": 0.5756083585510412, "grad_norm": 1.918483853340149, "learning_rate": 8.049452285531302e-05, "loss": 1.6011, "step": 16073 }, { "epoch": 0.5756441706806095, "grad_norm": 1.824968695640564, "learning_rate": 8.048314676338062e-05, "loss": 1.629, "step": 16074 }, { "epoch": 0.5756799828101778, "grad_norm": 1.566021203994751, "learning_rate": 8.047177093402228e-05, "loss": 1.4753, "step": 16075 }, { "epoch": 0.5757157949397461, "grad_norm": 1.6824095249176025, "learning_rate": 8.046039536739111e-05, "loss": 1.3666, "step": 16076 }, { "epoch": 0.5757516070693144, "grad_norm": 2.086169958114624, "learning_rate": 8.044902006364008e-05, "loss": 1.6941, "step": 16077 }, { "epoch": 0.5757874191988827, "grad_norm": 1.4665168523788452, "learning_rate": 8.043764502292232e-05, "loss": 1.5978, "step": 16078 }, { "epoch": 0.5758232313284509, "grad_norm": 1.646644949913025, "learning_rate": 8.04262702453908e-05, "loss": 1.5653, "step": 16079 }, { "epoch": 0.5758590434580192, "grad_norm": 1.9247905015945435, "learning_rate": 8.041489573119853e-05, "loss": 1.3912, "step": 16080 }, { "epoch": 0.5758948555875875, "grad_norm": 1.658804178237915, "learning_rate": 8.04035214804986e-05, "loss": 1.3743, "step": 16081 }, { "epoch": 0.5759306677171558, "grad_norm": 1.8746556043624878, "learning_rate": 8.0392147493444e-05, "loss": 1.3236, "step": 16082 }, { "epoch": 0.5759664798467241, "grad_norm": 1.518371343612671, "learning_rate": 8.038077377018776e-05, "loss": 1.2282, "step": 16083 }, { "epoch": 0.5760022919762924, "grad_norm": 2.3258750438690186, "learning_rate": 8.03694003108829e-05, "loss": 1.6595, "step": 16084 }, { "epoch": 0.5760381041058606, "grad_norm": 2.2161455154418945, "learning_rate": 8.035802711568245e-05, "loss": 1.6238, "step": 16085 }, { "epoch": 0.5760739162354289, "grad_norm": 1.8260993957519531, "learning_rate": 8.03466541847394e-05, "loss": 1.3711, "step": 16086 }, { "epoch": 0.5761097283649972, "grad_norm": 1.9408621788024902, "learning_rate": 8.033528151820679e-05, "loss": 1.7465, "step": 16087 }, { "epoch": 0.5761455404945655, "grad_norm": 1.375427007675171, "learning_rate": 8.032390911623758e-05, "loss": 1.6076, "step": 16088 }, { "epoch": 0.5761813526241338, "grad_norm": 1.5959179401397705, "learning_rate": 8.031253697898478e-05, "loss": 1.5481, "step": 16089 }, { "epoch": 0.5762171647537021, "grad_norm": 1.694019079208374, "learning_rate": 8.030116510660143e-05, "loss": 1.0399, "step": 16090 }, { "epoch": 0.5762529768832704, "grad_norm": 1.893786907196045, "learning_rate": 8.028979349924048e-05, "loss": 1.616, "step": 16091 }, { "epoch": 0.5762887890128386, "grad_norm": 1.5224804878234863, "learning_rate": 8.027842215705494e-05, "loss": 1.2418, "step": 16092 }, { "epoch": 0.5763246011424069, "grad_norm": 1.8478490114212036, "learning_rate": 8.026705108019777e-05, "loss": 1.4941, "step": 16093 }, { "epoch": 0.5763604132719752, "grad_norm": 1.8832570314407349, "learning_rate": 8.0255680268822e-05, "loss": 1.2081, "step": 16094 }, { "epoch": 0.5763962254015434, "grad_norm": 1.4357125759124756, "learning_rate": 8.024430972308056e-05, "loss": 1.2784, "step": 16095 }, { "epoch": 0.5764320375311118, "grad_norm": 1.4434853792190552, "learning_rate": 8.023293944312647e-05, "loss": 1.605, "step": 16096 }, { "epoch": 0.5764678496606801, "grad_norm": 1.8244801759719849, "learning_rate": 8.022156942911267e-05, "loss": 1.3657, "step": 16097 }, { "epoch": 0.5765036617902484, "grad_norm": 1.4665894508361816, "learning_rate": 8.021019968119215e-05, "loss": 1.6977, "step": 16098 }, { "epoch": 0.5765394739198166, "grad_norm": 1.9285708665847778, "learning_rate": 8.019883019951786e-05, "loss": 1.2284, "step": 16099 }, { "epoch": 0.5765752860493849, "grad_norm": 2.212251663208008, "learning_rate": 8.018746098424276e-05, "loss": 1.763, "step": 16100 }, { "epoch": 0.5766110981789532, "grad_norm": 1.7550066709518433, "learning_rate": 8.017609203551983e-05, "loss": 1.4755, "step": 16101 }, { "epoch": 0.5766469103085214, "grad_norm": 1.8005722761154175, "learning_rate": 8.0164723353502e-05, "loss": 1.434, "step": 16102 }, { "epoch": 0.5766827224380898, "grad_norm": 1.9695098400115967, "learning_rate": 8.015335493834224e-05, "loss": 1.6995, "step": 16103 }, { "epoch": 0.5767185345676581, "grad_norm": 1.847440481185913, "learning_rate": 8.014198679019348e-05, "loss": 1.7125, "step": 16104 }, { "epoch": 0.5767543466972264, "grad_norm": 1.2874605655670166, "learning_rate": 8.01306189092087e-05, "loss": 1.4815, "step": 16105 }, { "epoch": 0.5767901588267946, "grad_norm": 1.833154559135437, "learning_rate": 8.011925129554078e-05, "loss": 1.2951, "step": 16106 }, { "epoch": 0.5768259709563629, "grad_norm": 1.9240434169769287, "learning_rate": 8.01078839493427e-05, "loss": 1.1895, "step": 16107 }, { "epoch": 0.5768617830859312, "grad_norm": 1.921027421951294, "learning_rate": 8.009651687076739e-05, "loss": 1.4075, "step": 16108 }, { "epoch": 0.5768975952154994, "grad_norm": 1.5504812002182007, "learning_rate": 8.008515005996775e-05, "loss": 1.2061, "step": 16109 }, { "epoch": 0.5769334073450678, "grad_norm": 1.6226364374160767, "learning_rate": 8.007378351709676e-05, "loss": 1.6403, "step": 16110 }, { "epoch": 0.5769692194746361, "grad_norm": 1.543958067893982, "learning_rate": 8.006241724230728e-05, "loss": 1.6479, "step": 16111 }, { "epoch": 0.5770050316042044, "grad_norm": 1.6312440633773804, "learning_rate": 8.005105123575228e-05, "loss": 1.4381, "step": 16112 }, { "epoch": 0.5770408437337726, "grad_norm": 1.547906517982483, "learning_rate": 8.003968549758462e-05, "loss": 1.6837, "step": 16113 }, { "epoch": 0.5770766558633409, "grad_norm": 2.2383906841278076, "learning_rate": 8.002832002795729e-05, "loss": 0.9843, "step": 16114 }, { "epoch": 0.5771124679929092, "grad_norm": 1.4814233779907227, "learning_rate": 8.001695482702314e-05, "loss": 1.3434, "step": 16115 }, { "epoch": 0.5771482801224774, "grad_norm": 1.837546706199646, "learning_rate": 8.000558989493507e-05, "loss": 1.544, "step": 16116 }, { "epoch": 0.5771840922520458, "grad_norm": 1.2523295879364014, "learning_rate": 7.999422523184601e-05, "loss": 1.287, "step": 16117 }, { "epoch": 0.5772199043816141, "grad_norm": 1.5280488729476929, "learning_rate": 7.998286083790883e-05, "loss": 1.323, "step": 16118 }, { "epoch": 0.5772557165111823, "grad_norm": 1.6794685125350952, "learning_rate": 7.997149671327646e-05, "loss": 1.4495, "step": 16119 }, { "epoch": 0.5772915286407506, "grad_norm": 2.4332382678985596, "learning_rate": 7.996013285810173e-05, "loss": 1.5304, "step": 16120 }, { "epoch": 0.5773273407703189, "grad_norm": 1.4107089042663574, "learning_rate": 7.99487692725376e-05, "loss": 1.2941, "step": 16121 }, { "epoch": 0.5773631528998872, "grad_norm": 1.6227834224700928, "learning_rate": 7.993740595673689e-05, "loss": 1.3497, "step": 16122 }, { "epoch": 0.5773989650294554, "grad_norm": 1.5155694484710693, "learning_rate": 7.992604291085253e-05, "loss": 1.6335, "step": 16123 }, { "epoch": 0.5774347771590238, "grad_norm": 2.1160717010498047, "learning_rate": 7.991468013503735e-05, "loss": 1.6124, "step": 16124 }, { "epoch": 0.5774705892885921, "grad_norm": 1.6966854333877563, "learning_rate": 7.990331762944426e-05, "loss": 1.4299, "step": 16125 }, { "epoch": 0.5775064014181603, "grad_norm": 1.8344290256500244, "learning_rate": 7.989195539422609e-05, "loss": 1.5318, "step": 16126 }, { "epoch": 0.5775422135477286, "grad_norm": 2.1543333530426025, "learning_rate": 7.988059342953571e-05, "loss": 1.7969, "step": 16127 }, { "epoch": 0.5775780256772969, "grad_norm": 1.9290580749511719, "learning_rate": 7.986923173552602e-05, "loss": 1.7385, "step": 16128 }, { "epoch": 0.5776138378068651, "grad_norm": 1.6262422800064087, "learning_rate": 7.985787031234983e-05, "loss": 1.4024, "step": 16129 }, { "epoch": 0.5776496499364334, "grad_norm": 1.9909801483154297, "learning_rate": 7.984650916016003e-05, "loss": 1.3964, "step": 16130 }, { "epoch": 0.5776854620660018, "grad_norm": 1.3821978569030762, "learning_rate": 7.983514827910943e-05, "loss": 1.3624, "step": 16131 }, { "epoch": 0.5777212741955701, "grad_norm": 1.4430720806121826, "learning_rate": 7.982378766935092e-05, "loss": 1.6197, "step": 16132 }, { "epoch": 0.5777570863251383, "grad_norm": 1.3576154708862305, "learning_rate": 7.981242733103734e-05, "loss": 1.4386, "step": 16133 }, { "epoch": 0.5777928984547066, "grad_norm": 2.1250572204589844, "learning_rate": 7.980106726432148e-05, "loss": 1.2357, "step": 16134 }, { "epoch": 0.5778287105842749, "grad_norm": 1.6728894710540771, "learning_rate": 7.978970746935621e-05, "loss": 1.5269, "step": 16135 }, { "epoch": 0.5778645227138431, "grad_norm": 2.4180312156677246, "learning_rate": 7.977834794629436e-05, "loss": 1.5775, "step": 16136 }, { "epoch": 0.5779003348434114, "grad_norm": 1.6585233211517334, "learning_rate": 7.976698869528876e-05, "loss": 1.5318, "step": 16137 }, { "epoch": 0.5779361469729798, "grad_norm": 1.3715888261795044, "learning_rate": 7.975562971649219e-05, "loss": 1.5977, "step": 16138 }, { "epoch": 0.5779719591025481, "grad_norm": 1.8172003030776978, "learning_rate": 7.974427101005756e-05, "loss": 1.3096, "step": 16139 }, { "epoch": 0.5780077712321163, "grad_norm": 1.5655006170272827, "learning_rate": 7.973291257613761e-05, "loss": 1.4269, "step": 16140 }, { "epoch": 0.5780435833616846, "grad_norm": 1.313693881034851, "learning_rate": 7.97215544148852e-05, "loss": 1.3408, "step": 16141 }, { "epoch": 0.5780793954912529, "grad_norm": 1.644073486328125, "learning_rate": 7.971019652645313e-05, "loss": 1.4267, "step": 16142 }, { "epoch": 0.5781152076208211, "grad_norm": 2.4095518589019775, "learning_rate": 7.969883891099412e-05, "loss": 1.4167, "step": 16143 }, { "epoch": 0.5781510197503894, "grad_norm": 2.725630044937134, "learning_rate": 7.968748156866113e-05, "loss": 1.35, "step": 16144 }, { "epoch": 0.5781868318799578, "grad_norm": 2.1629457473754883, "learning_rate": 7.967612449960679e-05, "loss": 1.6466, "step": 16145 }, { "epoch": 0.578222644009526, "grad_norm": 1.4881287813186646, "learning_rate": 7.966476770398404e-05, "loss": 1.5601, "step": 16146 }, { "epoch": 0.5782584561390943, "grad_norm": 1.5154433250427246, "learning_rate": 7.965341118194559e-05, "loss": 1.3574, "step": 16147 }, { "epoch": 0.5782942682686626, "grad_norm": 1.5236823558807373, "learning_rate": 7.964205493364426e-05, "loss": 1.5902, "step": 16148 }, { "epoch": 0.5783300803982309, "grad_norm": 1.974445104598999, "learning_rate": 7.963069895923285e-05, "loss": 1.5286, "step": 16149 }, { "epoch": 0.5783658925277991, "grad_norm": 2.1835594177246094, "learning_rate": 7.961934325886404e-05, "loss": 1.2116, "step": 16150 }, { "epoch": 0.5784017046573674, "grad_norm": 1.4302401542663574, "learning_rate": 7.960798783269074e-05, "loss": 1.565, "step": 16151 }, { "epoch": 0.5784375167869358, "grad_norm": 2.1140241622924805, "learning_rate": 7.95966326808656e-05, "loss": 1.4891, "step": 16152 }, { "epoch": 0.578473328916504, "grad_norm": 1.4253383874893188, "learning_rate": 7.958527780354151e-05, "loss": 1.386, "step": 16153 }, { "epoch": 0.5785091410460723, "grad_norm": 1.2918790578842163, "learning_rate": 7.957392320087112e-05, "loss": 1.9235, "step": 16154 }, { "epoch": 0.5785449531756406, "grad_norm": 1.124372959136963, "learning_rate": 7.956256887300729e-05, "loss": 1.4101, "step": 16155 }, { "epoch": 0.5785807653052089, "grad_norm": 1.958508014678955, "learning_rate": 7.955121482010268e-05, "loss": 1.273, "step": 16156 }, { "epoch": 0.5786165774347771, "grad_norm": 2.251087188720703, "learning_rate": 7.953986104231018e-05, "loss": 1.4734, "step": 16157 }, { "epoch": 0.5786523895643454, "grad_norm": 2.016096353530884, "learning_rate": 7.95285075397824e-05, "loss": 1.6336, "step": 16158 }, { "epoch": 0.5786882016939138, "grad_norm": 1.6927529573440552, "learning_rate": 7.951715431267213e-05, "loss": 1.6828, "step": 16159 }, { "epoch": 0.578724013823482, "grad_norm": 1.4586488008499146, "learning_rate": 7.950580136113219e-05, "loss": 1.4452, "step": 16160 }, { "epoch": 0.5787598259530503, "grad_norm": 1.6537890434265137, "learning_rate": 7.949444868531517e-05, "loss": 1.5215, "step": 16161 }, { "epoch": 0.5787956380826186, "grad_norm": 1.5151437520980835, "learning_rate": 7.948309628537399e-05, "loss": 1.3912, "step": 16162 }, { "epoch": 0.5788314502121868, "grad_norm": 1.7033724784851074, "learning_rate": 7.94717441614612e-05, "loss": 1.5666, "step": 16163 }, { "epoch": 0.5788672623417551, "grad_norm": 1.2881311178207397, "learning_rate": 7.946039231372967e-05, "loss": 1.5314, "step": 16164 }, { "epoch": 0.5789030744713234, "grad_norm": 1.7593480348587036, "learning_rate": 7.944904074233201e-05, "loss": 1.5596, "step": 16165 }, { "epoch": 0.5789388866008918, "grad_norm": 1.4670255184173584, "learning_rate": 7.943768944742107e-05, "loss": 1.5353, "step": 16166 }, { "epoch": 0.57897469873046, "grad_norm": 1.4414554834365845, "learning_rate": 7.942633842914946e-05, "loss": 1.538, "step": 16167 }, { "epoch": 0.5790105108600283, "grad_norm": 1.5942708253860474, "learning_rate": 7.941498768766991e-05, "loss": 1.3996, "step": 16168 }, { "epoch": 0.5790463229895966, "grad_norm": 2.574983596801758, "learning_rate": 7.940363722313519e-05, "loss": 1.3455, "step": 16169 }, { "epoch": 0.5790821351191648, "grad_norm": 1.7376738786697388, "learning_rate": 7.939228703569792e-05, "loss": 1.5424, "step": 16170 }, { "epoch": 0.5791179472487331, "grad_norm": 1.8088690042495728, "learning_rate": 7.938093712551087e-05, "loss": 1.7215, "step": 16171 }, { "epoch": 0.5791537593783014, "grad_norm": 1.8485448360443115, "learning_rate": 7.936958749272669e-05, "loss": 1.6542, "step": 16172 }, { "epoch": 0.5791895715078698, "grad_norm": 1.5748894214630127, "learning_rate": 7.935823813749815e-05, "loss": 1.5086, "step": 16173 }, { "epoch": 0.579225383637438, "grad_norm": 2.4885716438293457, "learning_rate": 7.934688905997781e-05, "loss": 1.6712, "step": 16174 }, { "epoch": 0.5792611957670063, "grad_norm": 1.5027422904968262, "learning_rate": 7.933554026031852e-05, "loss": 1.2673, "step": 16175 }, { "epoch": 0.5792970078965746, "grad_norm": 1.5155824422836304, "learning_rate": 7.932419173867286e-05, "loss": 1.5036, "step": 16176 }, { "epoch": 0.5793328200261428, "grad_norm": 1.7846710681915283, "learning_rate": 7.93128434951935e-05, "loss": 1.4373, "step": 16177 }, { "epoch": 0.5793686321557111, "grad_norm": 1.8113620281219482, "learning_rate": 7.930149553003318e-05, "loss": 1.6359, "step": 16178 }, { "epoch": 0.5794044442852794, "grad_norm": 1.718705654144287, "learning_rate": 7.92901478433445e-05, "loss": 1.4123, "step": 16179 }, { "epoch": 0.5794402564148478, "grad_norm": 1.435090184211731, "learning_rate": 7.92788004352802e-05, "loss": 1.567, "step": 16180 }, { "epoch": 0.579476068544416, "grad_norm": 1.6663739681243896, "learning_rate": 7.926745330599289e-05, "loss": 1.545, "step": 16181 }, { "epoch": 0.5795118806739843, "grad_norm": 1.994544506072998, "learning_rate": 7.925610645563527e-05, "loss": 1.5623, "step": 16182 }, { "epoch": 0.5795476928035526, "grad_norm": 2.0755226612091064, "learning_rate": 7.924475988435996e-05, "loss": 1.7355, "step": 16183 }, { "epoch": 0.5795835049331208, "grad_norm": 1.354204773902893, "learning_rate": 7.923341359231965e-05, "loss": 1.4404, "step": 16184 }, { "epoch": 0.5796193170626891, "grad_norm": 1.4060965776443481, "learning_rate": 7.922206757966698e-05, "loss": 1.1692, "step": 16185 }, { "epoch": 0.5796551291922574, "grad_norm": 1.943017601966858, "learning_rate": 7.921072184655457e-05, "loss": 1.2975, "step": 16186 }, { "epoch": 0.5796909413218257, "grad_norm": 1.4191889762878418, "learning_rate": 7.91993763931351e-05, "loss": 1.4388, "step": 16187 }, { "epoch": 0.579726753451394, "grad_norm": 1.3876757621765137, "learning_rate": 7.918803121956117e-05, "loss": 1.4685, "step": 16188 }, { "epoch": 0.5797625655809623, "grad_norm": 1.7913190126419067, "learning_rate": 7.917668632598545e-05, "loss": 1.269, "step": 16189 }, { "epoch": 0.5797983777105306, "grad_norm": 2.235036611557007, "learning_rate": 7.916534171256054e-05, "loss": 1.1393, "step": 16190 }, { "epoch": 0.5798341898400988, "grad_norm": 1.541192889213562, "learning_rate": 7.91539973794391e-05, "loss": 1.7477, "step": 16191 }, { "epoch": 0.5798700019696671, "grad_norm": 1.6059012413024902, "learning_rate": 7.914265332677371e-05, "loss": 1.4964, "step": 16192 }, { "epoch": 0.5799058140992354, "grad_norm": 1.5511407852172852, "learning_rate": 7.913130955471704e-05, "loss": 1.7551, "step": 16193 }, { "epoch": 0.5799416262288037, "grad_norm": 1.5841376781463623, "learning_rate": 7.911996606342168e-05, "loss": 1.3235, "step": 16194 }, { "epoch": 0.579977438358372, "grad_norm": 1.651566982269287, "learning_rate": 7.910862285304022e-05, "loss": 1.4021, "step": 16195 }, { "epoch": 0.5800132504879403, "grad_norm": 1.7354012727737427, "learning_rate": 7.909727992372533e-05, "loss": 1.5678, "step": 16196 }, { "epoch": 0.5800490626175085, "grad_norm": 1.7143256664276123, "learning_rate": 7.908593727562954e-05, "loss": 1.3513, "step": 16197 }, { "epoch": 0.5800848747470768, "grad_norm": 1.5278571844100952, "learning_rate": 7.907459490890551e-05, "loss": 1.2315, "step": 16198 }, { "epoch": 0.5801206868766451, "grad_norm": 1.3946882486343384, "learning_rate": 7.906325282370579e-05, "loss": 1.7144, "step": 16199 }, { "epoch": 0.5801564990062134, "grad_norm": 1.6457384824752808, "learning_rate": 7.905191102018302e-05, "loss": 1.3999, "step": 16200 }, { "epoch": 0.5801923111357817, "grad_norm": 1.8805018663406372, "learning_rate": 7.904056949848975e-05, "loss": 1.5579, "step": 16201 }, { "epoch": 0.58022812326535, "grad_norm": 2.192815065383911, "learning_rate": 7.90292282587786e-05, "loss": 1.4462, "step": 16202 }, { "epoch": 0.5802639353949183, "grad_norm": 1.707252860069275, "learning_rate": 7.901788730120214e-05, "loss": 1.1617, "step": 16203 }, { "epoch": 0.5802997475244865, "grad_norm": 2.2788679599761963, "learning_rate": 7.90065466259129e-05, "loss": 1.3855, "step": 16204 }, { "epoch": 0.5803355596540548, "grad_norm": 1.7162657976150513, "learning_rate": 7.899520623306353e-05, "loss": 1.2427, "step": 16205 }, { "epoch": 0.5803713717836231, "grad_norm": 2.0036752223968506, "learning_rate": 7.898386612280654e-05, "loss": 1.4296, "step": 16206 }, { "epoch": 0.5804071839131913, "grad_norm": 1.5575006008148193, "learning_rate": 7.897252629529455e-05, "loss": 1.6481, "step": 16207 }, { "epoch": 0.5804429960427597, "grad_norm": 1.8689552545547485, "learning_rate": 7.896118675068007e-05, "loss": 1.5716, "step": 16208 }, { "epoch": 0.580478808172328, "grad_norm": 1.734002947807312, "learning_rate": 7.894984748911572e-05, "loss": 1.4126, "step": 16209 }, { "epoch": 0.5805146203018963, "grad_norm": 1.9795552492141724, "learning_rate": 7.893850851075398e-05, "loss": 1.3466, "step": 16210 }, { "epoch": 0.5805504324314645, "grad_norm": 1.9898791313171387, "learning_rate": 7.892716981574747e-05, "loss": 1.5883, "step": 16211 }, { "epoch": 0.5805862445610328, "grad_norm": 1.4967081546783447, "learning_rate": 7.89158314042487e-05, "loss": 1.5994, "step": 16212 }, { "epoch": 0.5806220566906011, "grad_norm": 1.672903299331665, "learning_rate": 7.890449327641021e-05, "loss": 1.7382, "step": 16213 }, { "epoch": 0.5806578688201693, "grad_norm": 1.7039663791656494, "learning_rate": 7.889315543238457e-05, "loss": 1.6315, "step": 16214 }, { "epoch": 0.5806936809497377, "grad_norm": 1.6308118104934692, "learning_rate": 7.888181787232427e-05, "loss": 1.5503, "step": 16215 }, { "epoch": 0.580729493079306, "grad_norm": 1.8817521333694458, "learning_rate": 7.88704805963819e-05, "loss": 1.5972, "step": 16216 }, { "epoch": 0.5807653052088743, "grad_norm": 1.8613077402114868, "learning_rate": 7.885914360470992e-05, "loss": 1.6216, "step": 16217 }, { "epoch": 0.5808011173384425, "grad_norm": 1.5126614570617676, "learning_rate": 7.884780689746094e-05, "loss": 1.4662, "step": 16218 }, { "epoch": 0.5808369294680108, "grad_norm": 1.5945414304733276, "learning_rate": 7.88364704747874e-05, "loss": 1.5989, "step": 16219 }, { "epoch": 0.5808727415975791, "grad_norm": 1.5594874620437622, "learning_rate": 7.882513433684188e-05, "loss": 1.5519, "step": 16220 }, { "epoch": 0.5809085537271473, "grad_norm": 1.834822177886963, "learning_rate": 7.881379848377685e-05, "loss": 1.6284, "step": 16221 }, { "epoch": 0.5809443658567157, "grad_norm": 2.2757322788238525, "learning_rate": 7.880246291574482e-05, "loss": 1.3027, "step": 16222 }, { "epoch": 0.580980177986284, "grad_norm": 1.6082566976547241, "learning_rate": 7.879112763289833e-05, "loss": 1.4609, "step": 16223 }, { "epoch": 0.5810159901158523, "grad_norm": 1.623914361000061, "learning_rate": 7.877979263538983e-05, "loss": 1.5868, "step": 16224 }, { "epoch": 0.5810518022454205, "grad_norm": 1.4521030187606812, "learning_rate": 7.876845792337189e-05, "loss": 1.3755, "step": 16225 }, { "epoch": 0.5810876143749888, "grad_norm": 1.9787819385528564, "learning_rate": 7.875712349699692e-05, "loss": 1.0918, "step": 16226 }, { "epoch": 0.5811234265045571, "grad_norm": 2.0238037109375, "learning_rate": 7.874578935641748e-05, "loss": 1.3673, "step": 16227 }, { "epoch": 0.5811592386341253, "grad_norm": 1.6591154336929321, "learning_rate": 7.873445550178601e-05, "loss": 1.4138, "step": 16228 }, { "epoch": 0.5811950507636937, "grad_norm": 1.5953693389892578, "learning_rate": 7.872312193325502e-05, "loss": 1.095, "step": 16229 }, { "epoch": 0.581230862893262, "grad_norm": 1.9868221282958984, "learning_rate": 7.871178865097699e-05, "loss": 1.5134, "step": 16230 }, { "epoch": 0.5812666750228302, "grad_norm": 1.6181082725524902, "learning_rate": 7.870045565510436e-05, "loss": 1.344, "step": 16231 }, { "epoch": 0.5813024871523985, "grad_norm": 1.8344509601593018, "learning_rate": 7.868912294578965e-05, "loss": 1.2748, "step": 16232 }, { "epoch": 0.5813382992819668, "grad_norm": 1.7959150075912476, "learning_rate": 7.867779052318528e-05, "loss": 1.7419, "step": 16233 }, { "epoch": 0.581374111411535, "grad_norm": 1.6314499378204346, "learning_rate": 7.866645838744375e-05, "loss": 1.7027, "step": 16234 }, { "epoch": 0.5814099235411033, "grad_norm": 2.4061760902404785, "learning_rate": 7.865512653871749e-05, "loss": 1.3808, "step": 16235 }, { "epoch": 0.5814457356706717, "grad_norm": 1.7618334293365479, "learning_rate": 7.864379497715898e-05, "loss": 1.5516, "step": 16236 }, { "epoch": 0.58148154780024, "grad_norm": 1.2001464366912842, "learning_rate": 7.863246370292065e-05, "loss": 1.2329, "step": 16237 }, { "epoch": 0.5815173599298082, "grad_norm": 1.4679129123687744, "learning_rate": 7.862113271615499e-05, "loss": 1.3114, "step": 16238 }, { "epoch": 0.5815531720593765, "grad_norm": 1.4955120086669922, "learning_rate": 7.860980201701441e-05, "loss": 1.4209, "step": 16239 }, { "epoch": 0.5815889841889448, "grad_norm": 1.7343052625656128, "learning_rate": 7.859847160565131e-05, "loss": 1.4165, "step": 16240 }, { "epoch": 0.581624796318513, "grad_norm": 2.181267499923706, "learning_rate": 7.858714148221822e-05, "loss": 1.6077, "step": 16241 }, { "epoch": 0.5816606084480813, "grad_norm": 1.4039392471313477, "learning_rate": 7.857581164686744e-05, "loss": 1.7029, "step": 16242 }, { "epoch": 0.5816964205776497, "grad_norm": 2.262587070465088, "learning_rate": 7.856448209975156e-05, "loss": 1.7322, "step": 16243 }, { "epoch": 0.581732232707218, "grad_norm": 1.8444851636886597, "learning_rate": 7.855315284102288e-05, "loss": 1.194, "step": 16244 }, { "epoch": 0.5817680448367862, "grad_norm": 1.571876049041748, "learning_rate": 7.854182387083389e-05, "loss": 1.4038, "step": 16245 }, { "epoch": 0.5818038569663545, "grad_norm": 2.275881767272949, "learning_rate": 7.8530495189337e-05, "loss": 1.6678, "step": 16246 }, { "epoch": 0.5818396690959228, "grad_norm": 1.6172714233398438, "learning_rate": 7.851916679668454e-05, "loss": 1.4166, "step": 16247 }, { "epoch": 0.581875481225491, "grad_norm": 2.5062615871429443, "learning_rate": 7.850783869302905e-05, "loss": 1.5164, "step": 16248 }, { "epoch": 0.5819112933550593, "grad_norm": 2.113309144973755, "learning_rate": 7.849651087852278e-05, "loss": 1.3307, "step": 16249 }, { "epoch": 0.5819471054846277, "grad_norm": 1.644601821899414, "learning_rate": 7.848518335331832e-05, "loss": 1.6786, "step": 16250 }, { "epoch": 0.581982917614196, "grad_norm": 1.3407495021820068, "learning_rate": 7.847385611756788e-05, "loss": 1.343, "step": 16251 }, { "epoch": 0.5820187297437642, "grad_norm": 1.6835163831710815, "learning_rate": 7.8462529171424e-05, "loss": 1.1167, "step": 16252 }, { "epoch": 0.5820545418733325, "grad_norm": 1.790061593055725, "learning_rate": 7.845120251503896e-05, "loss": 1.8415, "step": 16253 }, { "epoch": 0.5820903540029008, "grad_norm": 1.642216444015503, "learning_rate": 7.843987614856525e-05, "loss": 1.3947, "step": 16254 }, { "epoch": 0.582126166132469, "grad_norm": 1.661774754524231, "learning_rate": 7.842855007215517e-05, "loss": 1.1908, "step": 16255 }, { "epoch": 0.5821619782620373, "grad_norm": 1.867827296257019, "learning_rate": 7.841722428596109e-05, "loss": 1.5475, "step": 16256 }, { "epoch": 0.5821977903916057, "grad_norm": 1.7840546369552612, "learning_rate": 7.840589879013548e-05, "loss": 1.6112, "step": 16257 }, { "epoch": 0.582233602521174, "grad_norm": 1.458702802658081, "learning_rate": 7.839457358483057e-05, "loss": 1.3471, "step": 16258 }, { "epoch": 0.5822694146507422, "grad_norm": 1.494171142578125, "learning_rate": 7.838324867019888e-05, "loss": 1.2515, "step": 16259 }, { "epoch": 0.5823052267803105, "grad_norm": 1.5532360076904297, "learning_rate": 7.837192404639264e-05, "loss": 1.4255, "step": 16260 }, { "epoch": 0.5823410389098788, "grad_norm": 1.6035205125808716, "learning_rate": 7.83605997135643e-05, "loss": 1.502, "step": 16261 }, { "epoch": 0.582376851039447, "grad_norm": 1.7551525831222534, "learning_rate": 7.834927567186614e-05, "loss": 1.3939, "step": 16262 }, { "epoch": 0.5824126631690153, "grad_norm": 1.5942054986953735, "learning_rate": 7.833795192145062e-05, "loss": 1.4372, "step": 16263 }, { "epoch": 0.5824484752985837, "grad_norm": 1.804587960243225, "learning_rate": 7.832662846246997e-05, "loss": 1.4715, "step": 16264 }, { "epoch": 0.582484287428152, "grad_norm": 1.6673866510391235, "learning_rate": 7.831530529507656e-05, "loss": 1.4346, "step": 16265 }, { "epoch": 0.5825200995577202, "grad_norm": 1.700072169303894, "learning_rate": 7.830398241942278e-05, "loss": 1.438, "step": 16266 }, { "epoch": 0.5825559116872885, "grad_norm": 1.6875628232955933, "learning_rate": 7.829265983566088e-05, "loss": 1.4232, "step": 16267 }, { "epoch": 0.5825917238168568, "grad_norm": 1.3997061252593994, "learning_rate": 7.82813375439433e-05, "loss": 1.5627, "step": 16268 }, { "epoch": 0.582627535946425, "grad_norm": 2.5741705894470215, "learning_rate": 7.827001554442224e-05, "loss": 1.5578, "step": 16269 }, { "epoch": 0.5826633480759933, "grad_norm": 1.8426802158355713, "learning_rate": 7.825869383725017e-05, "loss": 1.4258, "step": 16270 }, { "epoch": 0.5826991602055617, "grad_norm": 1.2935112714767456, "learning_rate": 7.824737242257925e-05, "loss": 1.4715, "step": 16271 }, { "epoch": 0.5827349723351299, "grad_norm": 1.767466425895691, "learning_rate": 7.823605130056196e-05, "loss": 1.5631, "step": 16272 }, { "epoch": 0.5827707844646982, "grad_norm": 2.372025489807129, "learning_rate": 7.822473047135048e-05, "loss": 1.4195, "step": 16273 }, { "epoch": 0.5828065965942665, "grad_norm": 1.9583728313446045, "learning_rate": 7.821340993509716e-05, "loss": 1.5143, "step": 16274 }, { "epoch": 0.5828424087238347, "grad_norm": 1.539113163948059, "learning_rate": 7.820208969195432e-05, "loss": 1.4506, "step": 16275 }, { "epoch": 0.582878220853403, "grad_norm": 1.511946201324463, "learning_rate": 7.819076974207425e-05, "loss": 1.7248, "step": 16276 }, { "epoch": 0.5829140329829713, "grad_norm": 1.7202016115188599, "learning_rate": 7.817945008560923e-05, "loss": 1.5891, "step": 16277 }, { "epoch": 0.5829498451125397, "grad_norm": 1.5539473295211792, "learning_rate": 7.816813072271155e-05, "loss": 1.2524, "step": 16278 }, { "epoch": 0.5829856572421079, "grad_norm": 1.6296128034591675, "learning_rate": 7.815681165353353e-05, "loss": 1.3194, "step": 16279 }, { "epoch": 0.5830214693716762, "grad_norm": 1.5075005292892456, "learning_rate": 7.814549287822743e-05, "loss": 1.8098, "step": 16280 }, { "epoch": 0.5830572815012445, "grad_norm": 2.4536232948303223, "learning_rate": 7.813417439694553e-05, "loss": 1.7013, "step": 16281 }, { "epoch": 0.5830930936308127, "grad_norm": 1.7020354270935059, "learning_rate": 7.812285620984012e-05, "loss": 1.2331, "step": 16282 }, { "epoch": 0.583128905760381, "grad_norm": 1.890274167060852, "learning_rate": 7.811153831706344e-05, "loss": 1.5311, "step": 16283 }, { "epoch": 0.5831647178899493, "grad_norm": 1.4711588621139526, "learning_rate": 7.81002207187678e-05, "loss": 1.5288, "step": 16284 }, { "epoch": 0.5832005300195177, "grad_norm": 1.563541293144226, "learning_rate": 7.808890341510542e-05, "loss": 1.3513, "step": 16285 }, { "epoch": 0.5832363421490859, "grad_norm": 2.1257882118225098, "learning_rate": 7.80775864062286e-05, "loss": 1.2747, "step": 16286 }, { "epoch": 0.5832721542786542, "grad_norm": 1.6666767597198486, "learning_rate": 7.806626969228955e-05, "loss": 1.256, "step": 16287 }, { "epoch": 0.5833079664082225, "grad_norm": 1.7806174755096436, "learning_rate": 7.805495327344058e-05, "loss": 1.6864, "step": 16288 }, { "epoch": 0.5833437785377907, "grad_norm": 1.5467238426208496, "learning_rate": 7.804363714983387e-05, "loss": 1.2907, "step": 16289 }, { "epoch": 0.583379590667359, "grad_norm": 1.4566665887832642, "learning_rate": 7.803232132162174e-05, "loss": 1.5714, "step": 16290 }, { "epoch": 0.5834154027969273, "grad_norm": 1.4355965852737427, "learning_rate": 7.802100578895638e-05, "loss": 1.507, "step": 16291 }, { "epoch": 0.5834512149264957, "grad_norm": 1.7385189533233643, "learning_rate": 7.800969055199003e-05, "loss": 1.5291, "step": 16292 }, { "epoch": 0.5834870270560639, "grad_norm": 1.3698575496673584, "learning_rate": 7.799837561087493e-05, "loss": 1.3751, "step": 16293 }, { "epoch": 0.5835228391856322, "grad_norm": 1.8529399633407593, "learning_rate": 7.798706096576329e-05, "loss": 1.5273, "step": 16294 }, { "epoch": 0.5835586513152005, "grad_norm": 1.3171405792236328, "learning_rate": 7.797574661680737e-05, "loss": 1.0538, "step": 16295 }, { "epoch": 0.5835944634447687, "grad_norm": 1.4912500381469727, "learning_rate": 7.796443256415935e-05, "loss": 1.6792, "step": 16296 }, { "epoch": 0.583630275574337, "grad_norm": 1.7634475231170654, "learning_rate": 7.79531188079715e-05, "loss": 1.4089, "step": 16297 }, { "epoch": 0.5836660877039053, "grad_norm": 2.007218599319458, "learning_rate": 7.794180534839597e-05, "loss": 1.2288, "step": 16298 }, { "epoch": 0.5837018998334736, "grad_norm": 2.622135877609253, "learning_rate": 7.793049218558501e-05, "loss": 1.4607, "step": 16299 }, { "epoch": 0.5837377119630419, "grad_norm": 1.8438994884490967, "learning_rate": 7.791917931969082e-05, "loss": 1.6507, "step": 16300 }, { "epoch": 0.5837735240926102, "grad_norm": 1.5510149002075195, "learning_rate": 7.790786675086555e-05, "loss": 1.5637, "step": 16301 }, { "epoch": 0.5838093362221785, "grad_norm": 1.6385968923568726, "learning_rate": 7.789655447926147e-05, "loss": 1.5741, "step": 16302 }, { "epoch": 0.5838451483517467, "grad_norm": 1.8733488321304321, "learning_rate": 7.788524250503072e-05, "loss": 1.6931, "step": 16303 }, { "epoch": 0.583880960481315, "grad_norm": 1.5182271003723145, "learning_rate": 7.787393082832553e-05, "loss": 1.5635, "step": 16304 }, { "epoch": 0.5839167726108833, "grad_norm": 1.9357662200927734, "learning_rate": 7.786261944929803e-05, "loss": 1.447, "step": 16305 }, { "epoch": 0.5839525847404516, "grad_norm": 3.7043070793151855, "learning_rate": 7.785130836810045e-05, "loss": 1.4161, "step": 16306 }, { "epoch": 0.5839883968700199, "grad_norm": 1.9308794736862183, "learning_rate": 7.783999758488492e-05, "loss": 1.3324, "step": 16307 }, { "epoch": 0.5840242089995882, "grad_norm": 1.5649685859680176, "learning_rate": 7.782868709980368e-05, "loss": 1.5649, "step": 16308 }, { "epoch": 0.5840600211291564, "grad_norm": 1.5657800436019897, "learning_rate": 7.781737691300884e-05, "loss": 1.4936, "step": 16309 }, { "epoch": 0.5840958332587247, "grad_norm": 1.9875231981277466, "learning_rate": 7.780606702465256e-05, "loss": 1.534, "step": 16310 }, { "epoch": 0.584131645388293, "grad_norm": 1.580452799797058, "learning_rate": 7.779475743488705e-05, "loss": 1.5182, "step": 16311 }, { "epoch": 0.5841674575178613, "grad_norm": 1.5643384456634521, "learning_rate": 7.778344814386441e-05, "loss": 1.3858, "step": 16312 }, { "epoch": 0.5842032696474296, "grad_norm": 1.5773082971572876, "learning_rate": 7.777213915173685e-05, "loss": 1.5475, "step": 16313 }, { "epoch": 0.5842390817769979, "grad_norm": 1.6503983736038208, "learning_rate": 7.776083045865645e-05, "loss": 1.3458, "step": 16314 }, { "epoch": 0.5842748939065662, "grad_norm": 2.040325403213501, "learning_rate": 7.774952206477542e-05, "loss": 1.533, "step": 16315 }, { "epoch": 0.5843107060361344, "grad_norm": 1.8997361660003662, "learning_rate": 7.773821397024584e-05, "loss": 1.6031, "step": 16316 }, { "epoch": 0.5843465181657027, "grad_norm": 1.708116054534912, "learning_rate": 7.77269061752199e-05, "loss": 1.4829, "step": 16317 }, { "epoch": 0.584382330295271, "grad_norm": 1.731094241142273, "learning_rate": 7.77155986798497e-05, "loss": 1.5349, "step": 16318 }, { "epoch": 0.5844181424248392, "grad_norm": 1.470812439918518, "learning_rate": 7.770429148428736e-05, "loss": 1.3997, "step": 16319 }, { "epoch": 0.5844539545544076, "grad_norm": 1.660354495048523, "learning_rate": 7.769298458868504e-05, "loss": 1.2777, "step": 16320 }, { "epoch": 0.5844897666839759, "grad_norm": 1.6110458374023438, "learning_rate": 7.768167799319481e-05, "loss": 1.3919, "step": 16321 }, { "epoch": 0.5845255788135442, "grad_norm": 1.8067439794540405, "learning_rate": 7.767037169796885e-05, "loss": 1.4326, "step": 16322 }, { "epoch": 0.5845613909431124, "grad_norm": 1.5376381874084473, "learning_rate": 7.76590657031592e-05, "loss": 1.5476, "step": 16323 }, { "epoch": 0.5845972030726807, "grad_norm": 2.144806385040283, "learning_rate": 7.764776000891805e-05, "loss": 1.2215, "step": 16324 }, { "epoch": 0.584633015202249, "grad_norm": 1.5610839128494263, "learning_rate": 7.763645461539741e-05, "loss": 1.0606, "step": 16325 }, { "epoch": 0.5846688273318172, "grad_norm": 1.6355888843536377, "learning_rate": 7.762514952274945e-05, "loss": 1.3254, "step": 16326 }, { "epoch": 0.5847046394613856, "grad_norm": 2.3415260314941406, "learning_rate": 7.761384473112625e-05, "loss": 1.5028, "step": 16327 }, { "epoch": 0.5847404515909539, "grad_norm": 1.3841601610183716, "learning_rate": 7.760254024067986e-05, "loss": 1.3328, "step": 16328 }, { "epoch": 0.5847762637205222, "grad_norm": 2.1620593070983887, "learning_rate": 7.759123605156243e-05, "loss": 1.6989, "step": 16329 }, { "epoch": 0.5848120758500904, "grad_norm": 1.456923007965088, "learning_rate": 7.757993216392599e-05, "loss": 1.5152, "step": 16330 }, { "epoch": 0.5848478879796587, "grad_norm": 1.7215697765350342, "learning_rate": 7.756862857792268e-05, "loss": 1.7743, "step": 16331 }, { "epoch": 0.584883700109227, "grad_norm": 1.5496634244918823, "learning_rate": 7.755732529370449e-05, "loss": 1.5552, "step": 16332 }, { "epoch": 0.5849195122387952, "grad_norm": 1.8300232887268066, "learning_rate": 7.754602231142359e-05, "loss": 1.7157, "step": 16333 }, { "epoch": 0.5849553243683636, "grad_norm": 1.5172995328903198, "learning_rate": 7.753471963123196e-05, "loss": 1.148, "step": 16334 }, { "epoch": 0.5849911364979319, "grad_norm": 1.6155294179916382, "learning_rate": 7.752341725328171e-05, "loss": 1.2817, "step": 16335 }, { "epoch": 0.5850269486275002, "grad_norm": 1.5804482698440552, "learning_rate": 7.751211517772491e-05, "loss": 1.3887, "step": 16336 }, { "epoch": 0.5850627607570684, "grad_norm": 1.509569525718689, "learning_rate": 7.750081340471355e-05, "loss": 1.3041, "step": 16337 }, { "epoch": 0.5850985728866367, "grad_norm": 1.4579132795333862, "learning_rate": 7.748951193439977e-05, "loss": 1.5923, "step": 16338 }, { "epoch": 0.585134385016205, "grad_norm": 1.461970567703247, "learning_rate": 7.747821076693551e-05, "loss": 1.3511, "step": 16339 }, { "epoch": 0.5851701971457732, "grad_norm": 1.2731819152832031, "learning_rate": 7.746690990247291e-05, "loss": 1.4716, "step": 16340 }, { "epoch": 0.5852060092753416, "grad_norm": 1.532819151878357, "learning_rate": 7.745560934116398e-05, "loss": 1.3444, "step": 16341 }, { "epoch": 0.5852418214049099, "grad_norm": 1.585973858833313, "learning_rate": 7.744430908316074e-05, "loss": 1.5642, "step": 16342 }, { "epoch": 0.5852776335344781, "grad_norm": 1.9086010456085205, "learning_rate": 7.743300912861525e-05, "loss": 1.3634, "step": 16343 }, { "epoch": 0.5853134456640464, "grad_norm": 2.089420795440674, "learning_rate": 7.742170947767945e-05, "loss": 1.5245, "step": 16344 }, { "epoch": 0.5853492577936147, "grad_norm": 1.5326855182647705, "learning_rate": 7.741041013050549e-05, "loss": 1.4089, "step": 16345 }, { "epoch": 0.585385069923183, "grad_norm": 1.6081247329711914, "learning_rate": 7.739911108724527e-05, "loss": 1.3786, "step": 16346 }, { "epoch": 0.5854208820527512, "grad_norm": 1.4763007164001465, "learning_rate": 7.73878123480509e-05, "loss": 1.3, "step": 16347 }, { "epoch": 0.5854566941823196, "grad_norm": 1.4239866733551025, "learning_rate": 7.73765139130743e-05, "loss": 1.4136, "step": 16348 }, { "epoch": 0.5854925063118879, "grad_norm": 1.4767128229141235, "learning_rate": 7.736521578246758e-05, "loss": 1.6589, "step": 16349 }, { "epoch": 0.5855283184414561, "grad_norm": 2.082230806350708, "learning_rate": 7.735391795638262e-05, "loss": 1.2585, "step": 16350 }, { "epoch": 0.5855641305710244, "grad_norm": 1.4269074201583862, "learning_rate": 7.734262043497155e-05, "loss": 1.7076, "step": 16351 }, { "epoch": 0.5855999427005927, "grad_norm": 1.9690130949020386, "learning_rate": 7.733132321838628e-05, "loss": 1.4339, "step": 16352 }, { "epoch": 0.585635754830161, "grad_norm": 1.7755242586135864, "learning_rate": 7.732002630677878e-05, "loss": 1.3142, "step": 16353 }, { "epoch": 0.5856715669597292, "grad_norm": 1.3469429016113281, "learning_rate": 7.73087297003011e-05, "loss": 1.725, "step": 16354 }, { "epoch": 0.5857073790892976, "grad_norm": 1.6812926530838013, "learning_rate": 7.729743339910515e-05, "loss": 1.1344, "step": 16355 }, { "epoch": 0.5857431912188659, "grad_norm": 1.4505376815795898, "learning_rate": 7.728613740334304e-05, "loss": 1.3823, "step": 16356 }, { "epoch": 0.5857790033484341, "grad_norm": 1.8146635293960571, "learning_rate": 7.727484171316655e-05, "loss": 1.3706, "step": 16357 }, { "epoch": 0.5858148154780024, "grad_norm": 1.5328073501586914, "learning_rate": 7.726354632872783e-05, "loss": 1.5141, "step": 16358 }, { "epoch": 0.5858506276075707, "grad_norm": 1.5390815734863281, "learning_rate": 7.72522512501787e-05, "loss": 1.3987, "step": 16359 }, { "epoch": 0.5858864397371389, "grad_norm": 1.7571913003921509, "learning_rate": 7.724095647767125e-05, "loss": 1.7021, "step": 16360 }, { "epoch": 0.5859222518667072, "grad_norm": 1.810835361480713, "learning_rate": 7.722966201135736e-05, "loss": 1.5146, "step": 16361 }, { "epoch": 0.5859580639962756, "grad_norm": 1.467986822128296, "learning_rate": 7.721836785138896e-05, "loss": 1.0774, "step": 16362 }, { "epoch": 0.5859938761258439, "grad_norm": 1.3759533166885376, "learning_rate": 7.720707399791807e-05, "loss": 1.5041, "step": 16363 }, { "epoch": 0.5860296882554121, "grad_norm": 1.5349520444869995, "learning_rate": 7.719578045109657e-05, "loss": 1.4431, "step": 16364 }, { "epoch": 0.5860655003849804, "grad_norm": 1.4117509126663208, "learning_rate": 7.718448721107645e-05, "loss": 1.1674, "step": 16365 }, { "epoch": 0.5861013125145487, "grad_norm": 1.8245066404342651, "learning_rate": 7.717319427800957e-05, "loss": 1.4755, "step": 16366 }, { "epoch": 0.5861371246441169, "grad_norm": 1.6505935192108154, "learning_rate": 7.7161901652048e-05, "loss": 1.2912, "step": 16367 }, { "epoch": 0.5861729367736852, "grad_norm": 1.3770312070846558, "learning_rate": 7.715060933334351e-05, "loss": 1.3058, "step": 16368 }, { "epoch": 0.5862087489032536, "grad_norm": 2.2501673698425293, "learning_rate": 7.713931732204816e-05, "loss": 1.6156, "step": 16369 }, { "epoch": 0.5862445610328219, "grad_norm": 1.9228209257125854, "learning_rate": 7.712802561831381e-05, "loss": 1.5591, "step": 16370 }, { "epoch": 0.5862803731623901, "grad_norm": 1.1532500982284546, "learning_rate": 7.711673422229232e-05, "loss": 1.4264, "step": 16371 }, { "epoch": 0.5863161852919584, "grad_norm": 1.937796711921692, "learning_rate": 7.71054431341357e-05, "loss": 1.4744, "step": 16372 }, { "epoch": 0.5863519974215267, "grad_norm": 1.4498701095581055, "learning_rate": 7.709415235399577e-05, "loss": 1.4989, "step": 16373 }, { "epoch": 0.5863878095510949, "grad_norm": 1.9454270601272583, "learning_rate": 7.708286188202451e-05, "loss": 1.521, "step": 16374 }, { "epoch": 0.5864236216806632, "grad_norm": 1.8580267429351807, "learning_rate": 7.707157171837374e-05, "loss": 1.3413, "step": 16375 }, { "epoch": 0.5864594338102316, "grad_norm": 2.0274364948272705, "learning_rate": 7.706028186319543e-05, "loss": 1.4014, "step": 16376 }, { "epoch": 0.5864952459397998, "grad_norm": 1.5042753219604492, "learning_rate": 7.704899231664143e-05, "loss": 1.5814, "step": 16377 }, { "epoch": 0.5865310580693681, "grad_norm": 1.5244011878967285, "learning_rate": 7.703770307886364e-05, "loss": 1.1341, "step": 16378 }, { "epoch": 0.5865668701989364, "grad_norm": 2.079533576965332, "learning_rate": 7.702641415001394e-05, "loss": 1.9471, "step": 16379 }, { "epoch": 0.5866026823285047, "grad_norm": 1.7344539165496826, "learning_rate": 7.701512553024418e-05, "loss": 1.3302, "step": 16380 }, { "epoch": 0.5866384944580729, "grad_norm": 1.6465486288070679, "learning_rate": 7.700383721970628e-05, "loss": 1.3835, "step": 16381 }, { "epoch": 0.5866743065876412, "grad_norm": 2.1004645824432373, "learning_rate": 7.699254921855206e-05, "loss": 1.6895, "step": 16382 }, { "epoch": 0.5867101187172096, "grad_norm": 1.8558729887008667, "learning_rate": 7.698126152693345e-05, "loss": 1.4859, "step": 16383 }, { "epoch": 0.5867459308467778, "grad_norm": 1.7315419912338257, "learning_rate": 7.696997414500223e-05, "loss": 1.3328, "step": 16384 }, { "epoch": 0.5867817429763461, "grad_norm": 2.36985182762146, "learning_rate": 7.695868707291034e-05, "loss": 1.4141, "step": 16385 }, { "epoch": 0.5868175551059144, "grad_norm": 1.4058644771575928, "learning_rate": 7.694740031080957e-05, "loss": 1.2073, "step": 16386 }, { "epoch": 0.5868533672354826, "grad_norm": 1.6645869016647339, "learning_rate": 7.693611385885181e-05, "loss": 1.488, "step": 16387 }, { "epoch": 0.5868891793650509, "grad_norm": 1.4396991729736328, "learning_rate": 7.69248277171889e-05, "loss": 1.5952, "step": 16388 }, { "epoch": 0.5869249914946192, "grad_norm": 2.9401330947875977, "learning_rate": 7.691354188597263e-05, "loss": 1.8068, "step": 16389 }, { "epoch": 0.5869608036241876, "grad_norm": 1.3495395183563232, "learning_rate": 7.69022563653549e-05, "loss": 1.2889, "step": 16390 }, { "epoch": 0.5869966157537558, "grad_norm": 1.5951523780822754, "learning_rate": 7.689097115548751e-05, "loss": 1.5938, "step": 16391 }, { "epoch": 0.5870324278833241, "grad_norm": 1.477667212486267, "learning_rate": 7.68796862565223e-05, "loss": 1.5373, "step": 16392 }, { "epoch": 0.5870682400128924, "grad_norm": 1.5856584310531616, "learning_rate": 7.686840166861106e-05, "loss": 1.3693, "step": 16393 }, { "epoch": 0.5871040521424606, "grad_norm": 1.5418838262557983, "learning_rate": 7.685711739190568e-05, "loss": 1.4448, "step": 16394 }, { "epoch": 0.5871398642720289, "grad_norm": 1.6027543544769287, "learning_rate": 7.684583342655791e-05, "loss": 1.5726, "step": 16395 }, { "epoch": 0.5871756764015972, "grad_norm": 1.5729893445968628, "learning_rate": 7.68345497727196e-05, "loss": 1.5488, "step": 16396 }, { "epoch": 0.5872114885311656, "grad_norm": 1.2920430898666382, "learning_rate": 7.682326643054254e-05, "loss": 1.663, "step": 16397 }, { "epoch": 0.5872473006607338, "grad_norm": 1.6387059688568115, "learning_rate": 7.681198340017852e-05, "loss": 1.3222, "step": 16398 }, { "epoch": 0.5872831127903021, "grad_norm": 1.7342307567596436, "learning_rate": 7.680070068177936e-05, "loss": 1.3333, "step": 16399 }, { "epoch": 0.5873189249198704, "grad_norm": 1.7177081108093262, "learning_rate": 7.678941827549683e-05, "loss": 1.5123, "step": 16400 }, { "epoch": 0.5873547370494386, "grad_norm": 1.5641347169876099, "learning_rate": 7.677813618148276e-05, "loss": 1.479, "step": 16401 }, { "epoch": 0.5873905491790069, "grad_norm": 1.8655952215194702, "learning_rate": 7.67668543998889e-05, "loss": 1.6996, "step": 16402 }, { "epoch": 0.5874263613085752, "grad_norm": 1.3110804557800293, "learning_rate": 7.675557293086706e-05, "loss": 1.4526, "step": 16403 }, { "epoch": 0.5874621734381436, "grad_norm": 1.8281629085540771, "learning_rate": 7.674429177456899e-05, "loss": 1.6103, "step": 16404 }, { "epoch": 0.5874979855677118, "grad_norm": 1.439719557762146, "learning_rate": 7.673301093114649e-05, "loss": 1.2931, "step": 16405 }, { "epoch": 0.5875337976972801, "grad_norm": 1.3842743635177612, "learning_rate": 7.672173040075131e-05, "loss": 1.3253, "step": 16406 }, { "epoch": 0.5875696098268484, "grad_norm": 1.4752776622772217, "learning_rate": 7.671045018353521e-05, "loss": 1.2556, "step": 16407 }, { "epoch": 0.5876054219564166, "grad_norm": 1.8519983291625977, "learning_rate": 7.669917027964998e-05, "loss": 1.3942, "step": 16408 }, { "epoch": 0.5876412340859849, "grad_norm": 1.96647047996521, "learning_rate": 7.668789068924734e-05, "loss": 1.4563, "step": 16409 }, { "epoch": 0.5876770462155532, "grad_norm": 1.5094881057739258, "learning_rate": 7.667661141247907e-05, "loss": 1.2547, "step": 16410 }, { "epoch": 0.5877128583451215, "grad_norm": 1.5891088247299194, "learning_rate": 7.66653324494969e-05, "loss": 1.5648, "step": 16411 }, { "epoch": 0.5877486704746898, "grad_norm": 1.6747032403945923, "learning_rate": 7.665405380045258e-05, "loss": 1.2975, "step": 16412 }, { "epoch": 0.5877844826042581, "grad_norm": 1.7869306802749634, "learning_rate": 7.664277546549786e-05, "loss": 1.7527, "step": 16413 }, { "epoch": 0.5878202947338264, "grad_norm": 2.474652051925659, "learning_rate": 7.663149744478448e-05, "loss": 1.7304, "step": 16414 }, { "epoch": 0.5878561068633946, "grad_norm": 1.5477724075317383, "learning_rate": 7.662021973846415e-05, "loss": 1.2809, "step": 16415 }, { "epoch": 0.5878919189929629, "grad_norm": 1.887107014656067, "learning_rate": 7.660894234668859e-05, "loss": 1.5836, "step": 16416 }, { "epoch": 0.5879277311225312, "grad_norm": 1.466249942779541, "learning_rate": 7.659766526960957e-05, "loss": 1.607, "step": 16417 }, { "epoch": 0.5879635432520995, "grad_norm": 1.4023653268814087, "learning_rate": 7.658638850737874e-05, "loss": 1.0977, "step": 16418 }, { "epoch": 0.5879993553816678, "grad_norm": 1.5768914222717285, "learning_rate": 7.657511206014788e-05, "loss": 1.587, "step": 16419 }, { "epoch": 0.5880351675112361, "grad_norm": 1.5604252815246582, "learning_rate": 7.656383592806865e-05, "loss": 1.4809, "step": 16420 }, { "epoch": 0.5880709796408043, "grad_norm": 1.8900164365768433, "learning_rate": 7.655256011129279e-05, "loss": 1.368, "step": 16421 }, { "epoch": 0.5881067917703726, "grad_norm": 1.759080171585083, "learning_rate": 7.654128460997198e-05, "loss": 1.4159, "step": 16422 }, { "epoch": 0.5881426038999409, "grad_norm": 2.4119346141815186, "learning_rate": 7.653000942425794e-05, "loss": 1.3701, "step": 16423 }, { "epoch": 0.5881784160295092, "grad_norm": 1.836087703704834, "learning_rate": 7.651873455430237e-05, "loss": 1.5565, "step": 16424 }, { "epoch": 0.5882142281590775, "grad_norm": 1.5751034021377563, "learning_rate": 7.650746000025688e-05, "loss": 1.6149, "step": 16425 }, { "epoch": 0.5882500402886458, "grad_norm": 2.3002188205718994, "learning_rate": 7.649618576227325e-05, "loss": 1.5969, "step": 16426 }, { "epoch": 0.5882858524182141, "grad_norm": 1.5687283277511597, "learning_rate": 7.648491184050311e-05, "loss": 1.6116, "step": 16427 }, { "epoch": 0.5883216645477823, "grad_norm": 1.529663324356079, "learning_rate": 7.647363823509815e-05, "loss": 1.1357, "step": 16428 }, { "epoch": 0.5883574766773506, "grad_norm": 1.6907438039779663, "learning_rate": 7.646236494621004e-05, "loss": 1.4858, "step": 16429 }, { "epoch": 0.5883932888069189, "grad_norm": 1.6002248525619507, "learning_rate": 7.645109197399047e-05, "loss": 1.3588, "step": 16430 }, { "epoch": 0.5884291009364871, "grad_norm": 1.6536250114440918, "learning_rate": 7.643981931859104e-05, "loss": 1.396, "step": 16431 }, { "epoch": 0.5884649130660555, "grad_norm": 1.6143574714660645, "learning_rate": 7.642854698016348e-05, "loss": 1.3903, "step": 16432 }, { "epoch": 0.5885007251956238, "grad_norm": 1.4766873121261597, "learning_rate": 7.641727495885944e-05, "loss": 1.4478, "step": 16433 }, { "epoch": 0.5885365373251921, "grad_norm": 1.8612972497940063, "learning_rate": 7.640600325483049e-05, "loss": 1.4193, "step": 16434 }, { "epoch": 0.5885723494547603, "grad_norm": 1.6533550024032593, "learning_rate": 7.639473186822839e-05, "loss": 1.6117, "step": 16435 }, { "epoch": 0.5886081615843286, "grad_norm": 1.6703141927719116, "learning_rate": 7.638346079920466e-05, "loss": 1.782, "step": 16436 }, { "epoch": 0.5886439737138969, "grad_norm": 1.5920336246490479, "learning_rate": 7.637219004791106e-05, "loss": 1.3684, "step": 16437 }, { "epoch": 0.5886797858434651, "grad_norm": 1.2391998767852783, "learning_rate": 7.636091961449911e-05, "loss": 1.4898, "step": 16438 }, { "epoch": 0.5887155979730334, "grad_norm": 1.9100953340530396, "learning_rate": 7.634964949912054e-05, "loss": 1.7013, "step": 16439 }, { "epoch": 0.5887514101026018, "grad_norm": 2.002978563308716, "learning_rate": 7.633837970192694e-05, "loss": 1.6605, "step": 16440 }, { "epoch": 0.5887872222321701, "grad_norm": 2.9292209148406982, "learning_rate": 7.632711022306985e-05, "loss": 1.3659, "step": 16441 }, { "epoch": 0.5888230343617383, "grad_norm": 2.129926919937134, "learning_rate": 7.631584106270103e-05, "loss": 1.3669, "step": 16442 }, { "epoch": 0.5888588464913066, "grad_norm": 1.524235486984253, "learning_rate": 7.630457222097196e-05, "loss": 1.3326, "step": 16443 }, { "epoch": 0.5888946586208749, "grad_norm": 1.5463957786560059, "learning_rate": 7.629330369803435e-05, "loss": 1.3502, "step": 16444 }, { "epoch": 0.5889304707504431, "grad_norm": 1.6512547731399536, "learning_rate": 7.628203549403971e-05, "loss": 1.7101, "step": 16445 }, { "epoch": 0.5889662828800114, "grad_norm": 2.546053171157837, "learning_rate": 7.627076760913976e-05, "loss": 1.4441, "step": 16446 }, { "epoch": 0.5890020950095798, "grad_norm": 1.6109521389007568, "learning_rate": 7.625950004348595e-05, "loss": 1.4997, "step": 16447 }, { "epoch": 0.589037907139148, "grad_norm": 1.9896047115325928, "learning_rate": 7.624823279723001e-05, "loss": 1.3661, "step": 16448 }, { "epoch": 0.5890737192687163, "grad_norm": 1.843091368675232, "learning_rate": 7.623696587052343e-05, "loss": 1.5451, "step": 16449 }, { "epoch": 0.5891095313982846, "grad_norm": 2.0600764751434326, "learning_rate": 7.622569926351781e-05, "loss": 1.4128, "step": 16450 }, { "epoch": 0.5891453435278529, "grad_norm": 1.6426974534988403, "learning_rate": 7.621443297636478e-05, "loss": 1.5547, "step": 16451 }, { "epoch": 0.5891811556574211, "grad_norm": 1.6177315711975098, "learning_rate": 7.62031670092158e-05, "loss": 1.3688, "step": 16452 }, { "epoch": 0.5892169677869894, "grad_norm": 1.4145938158035278, "learning_rate": 7.619190136222259e-05, "loss": 1.3171, "step": 16453 }, { "epoch": 0.5892527799165578, "grad_norm": 2.2485506534576416, "learning_rate": 7.618063603553655e-05, "loss": 1.3094, "step": 16454 }, { "epoch": 0.589288592046126, "grad_norm": 1.2966976165771484, "learning_rate": 7.616937102930942e-05, "loss": 1.4065, "step": 16455 }, { "epoch": 0.5893244041756943, "grad_norm": 1.8887548446655273, "learning_rate": 7.61581063436926e-05, "loss": 1.3098, "step": 16456 }, { "epoch": 0.5893602163052626, "grad_norm": 1.525266408920288, "learning_rate": 7.614684197883775e-05, "loss": 1.3332, "step": 16457 }, { "epoch": 0.5893960284348309, "grad_norm": 2.2836077213287354, "learning_rate": 7.613557793489637e-05, "loss": 1.5326, "step": 16458 }, { "epoch": 0.5894318405643991, "grad_norm": 1.719794511795044, "learning_rate": 7.612431421201996e-05, "loss": 1.4174, "step": 16459 }, { "epoch": 0.5894676526939674, "grad_norm": 1.5337207317352295, "learning_rate": 7.611305081036015e-05, "loss": 1.1962, "step": 16460 }, { "epoch": 0.5895034648235358, "grad_norm": 1.6725982427597046, "learning_rate": 7.61017877300684e-05, "loss": 1.4057, "step": 16461 }, { "epoch": 0.589539276953104, "grad_norm": 1.944868564605713, "learning_rate": 7.609052497129629e-05, "loss": 1.4168, "step": 16462 }, { "epoch": 0.5895750890826723, "grad_norm": 1.736919641494751, "learning_rate": 7.607926253419531e-05, "loss": 1.619, "step": 16463 }, { "epoch": 0.5896109012122406, "grad_norm": 1.4023191928863525, "learning_rate": 7.606800041891701e-05, "loss": 1.5406, "step": 16464 }, { "epoch": 0.5896467133418088, "grad_norm": 2.086000919342041, "learning_rate": 7.605673862561284e-05, "loss": 1.5427, "step": 16465 }, { "epoch": 0.5896825254713771, "grad_norm": 1.7645149230957031, "learning_rate": 7.604547715443445e-05, "loss": 1.5254, "step": 16466 }, { "epoch": 0.5897183376009454, "grad_norm": 1.2208935022354126, "learning_rate": 7.603421600553324e-05, "loss": 1.5195, "step": 16467 }, { "epoch": 0.5897541497305138, "grad_norm": 1.5771421194076538, "learning_rate": 7.602295517906072e-05, "loss": 1.5837, "step": 16468 }, { "epoch": 0.589789961860082, "grad_norm": 1.4784822463989258, "learning_rate": 7.601169467516844e-05, "loss": 1.4173, "step": 16469 }, { "epoch": 0.5898257739896503, "grad_norm": 1.6251667737960815, "learning_rate": 7.600043449400782e-05, "loss": 1.6069, "step": 16470 }, { "epoch": 0.5898615861192186, "grad_norm": 1.9627031087875366, "learning_rate": 7.598917463573044e-05, "loss": 1.801, "step": 16471 }, { "epoch": 0.5898973982487868, "grad_norm": 2.1740000247955322, "learning_rate": 7.59779151004877e-05, "loss": 1.352, "step": 16472 }, { "epoch": 0.5899332103783551, "grad_norm": 1.7333546876907349, "learning_rate": 7.596665588843117e-05, "loss": 1.5806, "step": 16473 }, { "epoch": 0.5899690225079234, "grad_norm": 1.9666831493377686, "learning_rate": 7.595539699971225e-05, "loss": 1.4948, "step": 16474 }, { "epoch": 0.5900048346374918, "grad_norm": 1.692284345626831, "learning_rate": 7.594413843448248e-05, "loss": 1.4834, "step": 16475 }, { "epoch": 0.59004064676706, "grad_norm": 1.3781365156173706, "learning_rate": 7.593288019289329e-05, "loss": 1.3872, "step": 16476 }, { "epoch": 0.5900764588966283, "grad_norm": 1.7096667289733887, "learning_rate": 7.592162227509614e-05, "loss": 1.4666, "step": 16477 }, { "epoch": 0.5901122710261966, "grad_norm": 1.9078587293624878, "learning_rate": 7.591036468124252e-05, "loss": 1.5795, "step": 16478 }, { "epoch": 0.5901480831557648, "grad_norm": 1.6421924829483032, "learning_rate": 7.589910741148384e-05, "loss": 1.2502, "step": 16479 }, { "epoch": 0.5901838952853331, "grad_norm": 1.5889708995819092, "learning_rate": 7.588785046597161e-05, "loss": 1.4928, "step": 16480 }, { "epoch": 0.5902197074149014, "grad_norm": 2.91912841796875, "learning_rate": 7.587659384485723e-05, "loss": 1.848, "step": 16481 }, { "epoch": 0.5902555195444698, "grad_norm": 1.248953938484192, "learning_rate": 7.586533754829218e-05, "loss": 1.4012, "step": 16482 }, { "epoch": 0.590291331674038, "grad_norm": 1.8123250007629395, "learning_rate": 7.585408157642786e-05, "loss": 1.2366, "step": 16483 }, { "epoch": 0.5903271438036063, "grad_norm": 1.7613189220428467, "learning_rate": 7.584282592941574e-05, "loss": 1.1722, "step": 16484 }, { "epoch": 0.5903629559331746, "grad_norm": 1.6711210012435913, "learning_rate": 7.583157060740727e-05, "loss": 1.635, "step": 16485 }, { "epoch": 0.5903987680627428, "grad_norm": 1.593625783920288, "learning_rate": 7.582031561055378e-05, "loss": 1.5147, "step": 16486 }, { "epoch": 0.5904345801923111, "grad_norm": 1.475412130355835, "learning_rate": 7.58090609390068e-05, "loss": 1.5504, "step": 16487 }, { "epoch": 0.5904703923218794, "grad_norm": 2.0667693614959717, "learning_rate": 7.579780659291768e-05, "loss": 1.4428, "step": 16488 }, { "epoch": 0.5905062044514477, "grad_norm": 1.4998021125793457, "learning_rate": 7.578655257243786e-05, "loss": 1.5839, "step": 16489 }, { "epoch": 0.590542016581016, "grad_norm": 1.578344702720642, "learning_rate": 7.577529887771873e-05, "loss": 1.4212, "step": 16490 }, { "epoch": 0.5905778287105843, "grad_norm": 1.7631887197494507, "learning_rate": 7.576404550891172e-05, "loss": 1.5553, "step": 16491 }, { "epoch": 0.5906136408401526, "grad_norm": 1.7021032571792603, "learning_rate": 7.57527924661682e-05, "loss": 1.458, "step": 16492 }, { "epoch": 0.5906494529697208, "grad_norm": 1.9911140203475952, "learning_rate": 7.57415397496396e-05, "loss": 1.699, "step": 16493 }, { "epoch": 0.5906852650992891, "grad_norm": 1.4853942394256592, "learning_rate": 7.57302873594773e-05, "loss": 1.3604, "step": 16494 }, { "epoch": 0.5907210772288574, "grad_norm": 1.4939459562301636, "learning_rate": 7.571903529583265e-05, "loss": 1.4661, "step": 16495 }, { "epoch": 0.5907568893584257, "grad_norm": 1.3259398937225342, "learning_rate": 7.570778355885708e-05, "loss": 1.5328, "step": 16496 }, { "epoch": 0.590792701487994, "grad_norm": 1.5033601522445679, "learning_rate": 7.569653214870192e-05, "loss": 1.6475, "step": 16497 }, { "epoch": 0.5908285136175623, "grad_norm": 1.707831621170044, "learning_rate": 7.568528106551862e-05, "loss": 1.1923, "step": 16498 }, { "epoch": 0.5908643257471305, "grad_norm": 2.281423807144165, "learning_rate": 7.567403030945844e-05, "loss": 1.4786, "step": 16499 }, { "epoch": 0.5909001378766988, "grad_norm": 1.7174972295761108, "learning_rate": 7.566277988067285e-05, "loss": 1.4596, "step": 16500 }, { "epoch": 0.5909359500062671, "grad_norm": 1.3764506578445435, "learning_rate": 7.565152977931314e-05, "loss": 1.4684, "step": 16501 }, { "epoch": 0.5909717621358354, "grad_norm": 2.216895818710327, "learning_rate": 7.56402800055307e-05, "loss": 1.5763, "step": 16502 }, { "epoch": 0.5910075742654037, "grad_norm": 1.5239827632904053, "learning_rate": 7.562903055947688e-05, "loss": 1.2643, "step": 16503 }, { "epoch": 0.591043386394972, "grad_norm": 1.4618558883666992, "learning_rate": 7.561778144130299e-05, "loss": 1.4947, "step": 16504 }, { "epoch": 0.5910791985245403, "grad_norm": 1.6704903841018677, "learning_rate": 7.560653265116042e-05, "loss": 1.4775, "step": 16505 }, { "epoch": 0.5911150106541085, "grad_norm": 1.5482014417648315, "learning_rate": 7.559528418920048e-05, "loss": 1.3616, "step": 16506 }, { "epoch": 0.5911508227836768, "grad_norm": 1.870289921760559, "learning_rate": 7.558403605557453e-05, "loss": 1.4348, "step": 16507 }, { "epoch": 0.5911866349132451, "grad_norm": 1.4968332052230835, "learning_rate": 7.557278825043385e-05, "loss": 1.3695, "step": 16508 }, { "epoch": 0.5912224470428133, "grad_norm": 1.5539411306381226, "learning_rate": 7.556154077392982e-05, "loss": 1.3545, "step": 16509 }, { "epoch": 0.5912582591723817, "grad_norm": 1.5246566534042358, "learning_rate": 7.555029362621371e-05, "loss": 1.4596, "step": 16510 }, { "epoch": 0.59129407130195, "grad_norm": 1.3691539764404297, "learning_rate": 7.553904680743688e-05, "loss": 1.4245, "step": 16511 }, { "epoch": 0.5913298834315183, "grad_norm": 1.5674803256988525, "learning_rate": 7.552780031775064e-05, "loss": 1.3345, "step": 16512 }, { "epoch": 0.5913656955610865, "grad_norm": 1.5378525257110596, "learning_rate": 7.551655415730624e-05, "loss": 1.5885, "step": 16513 }, { "epoch": 0.5914015076906548, "grad_norm": 1.648198127746582, "learning_rate": 7.550530832625505e-05, "loss": 1.5619, "step": 16514 }, { "epoch": 0.5914373198202231, "grad_norm": 2.732581615447998, "learning_rate": 7.549406282474833e-05, "loss": 2.0635, "step": 16515 }, { "epoch": 0.5914731319497913, "grad_norm": 1.5190062522888184, "learning_rate": 7.548281765293739e-05, "loss": 1.583, "step": 16516 }, { "epoch": 0.5915089440793597, "grad_norm": 1.6483665704727173, "learning_rate": 7.54715728109735e-05, "loss": 1.2961, "step": 16517 }, { "epoch": 0.591544756208928, "grad_norm": 1.692216396331787, "learning_rate": 7.546032829900797e-05, "loss": 1.451, "step": 16518 }, { "epoch": 0.5915805683384963, "grad_norm": 1.4392499923706055, "learning_rate": 7.544908411719207e-05, "loss": 1.3544, "step": 16519 }, { "epoch": 0.5916163804680645, "grad_norm": 1.5235916376113892, "learning_rate": 7.543784026567708e-05, "loss": 1.4944, "step": 16520 }, { "epoch": 0.5916521925976328, "grad_norm": 1.9779636859893799, "learning_rate": 7.542659674461429e-05, "loss": 1.3904, "step": 16521 }, { "epoch": 0.5916880047272011, "grad_norm": 1.3239980936050415, "learning_rate": 7.541535355415487e-05, "loss": 1.524, "step": 16522 }, { "epoch": 0.5917238168567693, "grad_norm": 1.3218424320220947, "learning_rate": 7.540411069445021e-05, "loss": 1.4584, "step": 16523 }, { "epoch": 0.5917596289863377, "grad_norm": 1.6652659177780151, "learning_rate": 7.53928681656515e-05, "loss": 1.1819, "step": 16524 }, { "epoch": 0.591795441115906, "grad_norm": 1.502694010734558, "learning_rate": 7.538162596791002e-05, "loss": 1.8572, "step": 16525 }, { "epoch": 0.5918312532454743, "grad_norm": 1.5385327339172363, "learning_rate": 7.537038410137698e-05, "loss": 1.2326, "step": 16526 }, { "epoch": 0.5918670653750425, "grad_norm": 1.5926456451416016, "learning_rate": 7.535914256620368e-05, "loss": 1.6148, "step": 16527 }, { "epoch": 0.5919028775046108, "grad_norm": 1.8524401187896729, "learning_rate": 7.534790136254132e-05, "loss": 1.1339, "step": 16528 }, { "epoch": 0.5919386896341791, "grad_norm": 1.430527687072754, "learning_rate": 7.533666049054115e-05, "loss": 1.4224, "step": 16529 }, { "epoch": 0.5919745017637473, "grad_norm": 1.5670427083969116, "learning_rate": 7.532541995035444e-05, "loss": 1.6936, "step": 16530 }, { "epoch": 0.5920103138933157, "grad_norm": 1.8079187870025635, "learning_rate": 7.53141797421323e-05, "loss": 1.2269, "step": 16531 }, { "epoch": 0.592046126022884, "grad_norm": 1.840510368347168, "learning_rate": 7.53029398660261e-05, "loss": 1.5701, "step": 16532 }, { "epoch": 0.5920819381524522, "grad_norm": 1.3353474140167236, "learning_rate": 7.529170032218691e-05, "loss": 1.6003, "step": 16533 }, { "epoch": 0.5921177502820205, "grad_norm": 2.0196423530578613, "learning_rate": 7.52804611107661e-05, "loss": 1.605, "step": 16534 }, { "epoch": 0.5921535624115888, "grad_norm": 1.7749592065811157, "learning_rate": 7.526922223191473e-05, "loss": 1.8357, "step": 16535 }, { "epoch": 0.592189374541157, "grad_norm": 1.9717570543289185, "learning_rate": 7.525798368578412e-05, "loss": 1.4984, "step": 16536 }, { "epoch": 0.5922251866707253, "grad_norm": 1.611171841621399, "learning_rate": 7.524674547252544e-05, "loss": 1.3366, "step": 16537 }, { "epoch": 0.5922609988002937, "grad_norm": 1.4326066970825195, "learning_rate": 7.523550759228981e-05, "loss": 1.5233, "step": 16538 }, { "epoch": 0.592296810929862, "grad_norm": 1.420731782913208, "learning_rate": 7.522427004522855e-05, "loss": 1.4304, "step": 16539 }, { "epoch": 0.5923326230594302, "grad_norm": 2.4821457862854004, "learning_rate": 7.52130328314927e-05, "loss": 1.534, "step": 16540 }, { "epoch": 0.5923684351889985, "grad_norm": 1.7809147834777832, "learning_rate": 7.52017959512336e-05, "loss": 1.3436, "step": 16541 }, { "epoch": 0.5924042473185668, "grad_norm": 1.487674593925476, "learning_rate": 7.519055940460227e-05, "loss": 1.3221, "step": 16542 }, { "epoch": 0.592440059448135, "grad_norm": 1.630618929862976, "learning_rate": 7.517932319175003e-05, "loss": 1.391, "step": 16543 }, { "epoch": 0.5924758715777033, "grad_norm": 1.7148510217666626, "learning_rate": 7.516808731282793e-05, "loss": 1.6317, "step": 16544 }, { "epoch": 0.5925116837072717, "grad_norm": 1.6634539365768433, "learning_rate": 7.515685176798723e-05, "loss": 1.8021, "step": 16545 }, { "epoch": 0.59254749583684, "grad_norm": 1.7055913209915161, "learning_rate": 7.514561655737904e-05, "loss": 1.7112, "step": 16546 }, { "epoch": 0.5925833079664082, "grad_norm": 1.6069657802581787, "learning_rate": 7.513438168115449e-05, "loss": 1.4381, "step": 16547 }, { "epoch": 0.5926191200959765, "grad_norm": 1.99888277053833, "learning_rate": 7.512314713946478e-05, "loss": 1.677, "step": 16548 }, { "epoch": 0.5926549322255448, "grad_norm": 1.9482409954071045, "learning_rate": 7.5111912932461e-05, "loss": 1.0121, "step": 16549 }, { "epoch": 0.592690744355113, "grad_norm": 1.8642888069152832, "learning_rate": 7.510067906029437e-05, "loss": 1.5993, "step": 16550 }, { "epoch": 0.5927265564846813, "grad_norm": 1.965591311454773, "learning_rate": 7.508944552311594e-05, "loss": 1.4873, "step": 16551 }, { "epoch": 0.5927623686142497, "grad_norm": 1.6098365783691406, "learning_rate": 7.507821232107695e-05, "loss": 1.433, "step": 16552 }, { "epoch": 0.592798180743818, "grad_norm": 1.4152309894561768, "learning_rate": 7.506697945432841e-05, "loss": 1.7259, "step": 16553 }, { "epoch": 0.5928339928733862, "grad_norm": 1.5540013313293457, "learning_rate": 7.505574692302155e-05, "loss": 1.408, "step": 16554 }, { "epoch": 0.5928698050029545, "grad_norm": 1.8713033199310303, "learning_rate": 7.504451472730743e-05, "loss": 1.4247, "step": 16555 }, { "epoch": 0.5929056171325228, "grad_norm": 1.4902887344360352, "learning_rate": 7.503328286733715e-05, "loss": 1.4305, "step": 16556 }, { "epoch": 0.592941429262091, "grad_norm": 2.143108367919922, "learning_rate": 7.502205134326185e-05, "loss": 1.4066, "step": 16557 }, { "epoch": 0.5929772413916593, "grad_norm": 2.025181770324707, "learning_rate": 7.501082015523263e-05, "loss": 1.6475, "step": 16558 }, { "epoch": 0.5930130535212277, "grad_norm": 2.7611522674560547, "learning_rate": 7.499958930340061e-05, "loss": 1.6847, "step": 16559 }, { "epoch": 0.593048865650796, "grad_norm": 1.5839300155639648, "learning_rate": 7.498835878791684e-05, "loss": 1.6008, "step": 16560 }, { "epoch": 0.5930846777803642, "grad_norm": 1.7940725088119507, "learning_rate": 7.497712860893245e-05, "loss": 1.2224, "step": 16561 }, { "epoch": 0.5931204899099325, "grad_norm": 1.4097790718078613, "learning_rate": 7.49658987665985e-05, "loss": 1.5644, "step": 16562 }, { "epoch": 0.5931563020395008, "grad_norm": 1.7357324361801147, "learning_rate": 7.495466926106614e-05, "loss": 1.3663, "step": 16563 }, { "epoch": 0.593192114169069, "grad_norm": 2.125781774520874, "learning_rate": 7.494344009248637e-05, "loss": 1.6129, "step": 16564 }, { "epoch": 0.5932279262986373, "grad_norm": 1.732550024986267, "learning_rate": 7.493221126101028e-05, "loss": 1.6356, "step": 16565 }, { "epoch": 0.5932637384282057, "grad_norm": 1.9407490491867065, "learning_rate": 7.492098276678898e-05, "loss": 1.3264, "step": 16566 }, { "epoch": 0.593299550557774, "grad_norm": 1.5045716762542725, "learning_rate": 7.490975460997348e-05, "loss": 1.1289, "step": 16567 }, { "epoch": 0.5933353626873422, "grad_norm": 1.425619125366211, "learning_rate": 7.489852679071488e-05, "loss": 1.4394, "step": 16568 }, { "epoch": 0.5933711748169105, "grad_norm": 2.01013445854187, "learning_rate": 7.488729930916421e-05, "loss": 1.4089, "step": 16569 }, { "epoch": 0.5934069869464788, "grad_norm": 1.647908329963684, "learning_rate": 7.487607216547255e-05, "loss": 1.6508, "step": 16570 }, { "epoch": 0.593442799076047, "grad_norm": 1.4094040393829346, "learning_rate": 7.486484535979092e-05, "loss": 1.5401, "step": 16571 }, { "epoch": 0.5934786112056153, "grad_norm": 1.2045484781265259, "learning_rate": 7.485361889227038e-05, "loss": 1.2202, "step": 16572 }, { "epoch": 0.5935144233351837, "grad_norm": 1.9746378660202026, "learning_rate": 7.484239276306198e-05, "loss": 1.4754, "step": 16573 }, { "epoch": 0.5935502354647519, "grad_norm": 1.6177870035171509, "learning_rate": 7.483116697231671e-05, "loss": 1.2873, "step": 16574 }, { "epoch": 0.5935860475943202, "grad_norm": 2.0700132846832275, "learning_rate": 7.481994152018563e-05, "loss": 1.1141, "step": 16575 }, { "epoch": 0.5936218597238885, "grad_norm": 1.6111782789230347, "learning_rate": 7.480871640681975e-05, "loss": 1.4394, "step": 16576 }, { "epoch": 0.5936576718534567, "grad_norm": 1.553205966949463, "learning_rate": 7.479749163237012e-05, "loss": 1.3981, "step": 16577 }, { "epoch": 0.593693483983025, "grad_norm": 1.7631208896636963, "learning_rate": 7.47862671969877e-05, "loss": 1.7236, "step": 16578 }, { "epoch": 0.5937292961125933, "grad_norm": 1.4806108474731445, "learning_rate": 7.477504310082354e-05, "loss": 1.5397, "step": 16579 }, { "epoch": 0.5937651082421617, "grad_norm": 1.9398987293243408, "learning_rate": 7.476381934402865e-05, "loss": 1.2193, "step": 16580 }, { "epoch": 0.5938009203717299, "grad_norm": 1.654647946357727, "learning_rate": 7.475259592675402e-05, "loss": 1.4312, "step": 16581 }, { "epoch": 0.5938367325012982, "grad_norm": 1.9441183805465698, "learning_rate": 7.474137284915065e-05, "loss": 1.4922, "step": 16582 }, { "epoch": 0.5938725446308665, "grad_norm": 2.232104539871216, "learning_rate": 7.47301501113695e-05, "loss": 1.2633, "step": 16583 }, { "epoch": 0.5939083567604347, "grad_norm": 1.7381802797317505, "learning_rate": 7.471892771356161e-05, "loss": 1.4915, "step": 16584 }, { "epoch": 0.593944168890003, "grad_norm": 1.7541508674621582, "learning_rate": 7.470770565587792e-05, "loss": 1.4452, "step": 16585 }, { "epoch": 0.5939799810195713, "grad_norm": 1.4216136932373047, "learning_rate": 7.469648393846943e-05, "loss": 1.3694, "step": 16586 }, { "epoch": 0.5940157931491397, "grad_norm": 1.4462414979934692, "learning_rate": 7.46852625614871e-05, "loss": 1.5004, "step": 16587 }, { "epoch": 0.5940516052787079, "grad_norm": 1.6865832805633545, "learning_rate": 7.467404152508193e-05, "loss": 1.8021, "step": 16588 }, { "epoch": 0.5940874174082762, "grad_norm": 2.2418625354766846, "learning_rate": 7.466282082940484e-05, "loss": 1.493, "step": 16589 }, { "epoch": 0.5941232295378445, "grad_norm": 2.0363099575042725, "learning_rate": 7.465160047460685e-05, "loss": 1.7929, "step": 16590 }, { "epoch": 0.5941590416674127, "grad_norm": 1.8767224550247192, "learning_rate": 7.464038046083885e-05, "loss": 1.5985, "step": 16591 }, { "epoch": 0.594194853796981, "grad_norm": 1.5032994747161865, "learning_rate": 7.462916078825182e-05, "loss": 1.4272, "step": 16592 }, { "epoch": 0.5942306659265493, "grad_norm": 1.4486925601959229, "learning_rate": 7.46179414569967e-05, "loss": 1.4855, "step": 16593 }, { "epoch": 0.5942664780561177, "grad_norm": 1.592706561088562, "learning_rate": 7.460672246722444e-05, "loss": 1.7044, "step": 16594 }, { "epoch": 0.5943022901856859, "grad_norm": 1.9221786260604858, "learning_rate": 7.4595503819086e-05, "loss": 1.2772, "step": 16595 }, { "epoch": 0.5943381023152542, "grad_norm": 1.6944142580032349, "learning_rate": 7.458428551273226e-05, "loss": 1.4654, "step": 16596 }, { "epoch": 0.5943739144448225, "grad_norm": 2.0230796337127686, "learning_rate": 7.45730675483142e-05, "loss": 1.3047, "step": 16597 }, { "epoch": 0.5944097265743907, "grad_norm": 1.6869089603424072, "learning_rate": 7.456184992598267e-05, "loss": 1.5513, "step": 16598 }, { "epoch": 0.594445538703959, "grad_norm": 1.6149792671203613, "learning_rate": 7.455063264588869e-05, "loss": 1.4627, "step": 16599 }, { "epoch": 0.5944813508335273, "grad_norm": 1.7246642112731934, "learning_rate": 7.453941570818309e-05, "loss": 1.502, "step": 16600 }, { "epoch": 0.5945171629630956, "grad_norm": 1.4698948860168457, "learning_rate": 7.452819911301681e-05, "loss": 1.1747, "step": 16601 }, { "epoch": 0.5945529750926639, "grad_norm": 2.224147081375122, "learning_rate": 7.451698286054076e-05, "loss": 1.7846, "step": 16602 }, { "epoch": 0.5945887872222322, "grad_norm": 1.9010647535324097, "learning_rate": 7.450576695090583e-05, "loss": 1.1994, "step": 16603 }, { "epoch": 0.5946245993518005, "grad_norm": 1.8986421823501587, "learning_rate": 7.449455138426294e-05, "loss": 1.392, "step": 16604 }, { "epoch": 0.5946604114813687, "grad_norm": 2.1596598625183105, "learning_rate": 7.448333616076293e-05, "loss": 1.5052, "step": 16605 }, { "epoch": 0.594696223610937, "grad_norm": 2.1113686561584473, "learning_rate": 7.447212128055675e-05, "loss": 1.2468, "step": 16606 }, { "epoch": 0.5947320357405053, "grad_norm": 1.4451698064804077, "learning_rate": 7.446090674379522e-05, "loss": 1.6236, "step": 16607 }, { "epoch": 0.5947678478700736, "grad_norm": 2.0924429893493652, "learning_rate": 7.444969255062928e-05, "loss": 1.4995, "step": 16608 }, { "epoch": 0.5948036599996419, "grad_norm": 1.6229060888290405, "learning_rate": 7.443847870120976e-05, "loss": 1.452, "step": 16609 }, { "epoch": 0.5948394721292102, "grad_norm": 1.5565780401229858, "learning_rate": 7.442726519568751e-05, "loss": 1.4328, "step": 16610 }, { "epoch": 0.5948752842587784, "grad_norm": 1.7966111898422241, "learning_rate": 7.441605203421345e-05, "loss": 1.5491, "step": 16611 }, { "epoch": 0.5949110963883467, "grad_norm": 1.5467952489852905, "learning_rate": 7.440483921693839e-05, "loss": 1.5181, "step": 16612 }, { "epoch": 0.594946908517915, "grad_norm": 1.3843365907669067, "learning_rate": 7.439362674401322e-05, "loss": 1.3912, "step": 16613 }, { "epoch": 0.5949827206474833, "grad_norm": 1.6058661937713623, "learning_rate": 7.438241461558875e-05, "loss": 1.5003, "step": 16614 }, { "epoch": 0.5950185327770516, "grad_norm": 1.2036961317062378, "learning_rate": 7.437120283181586e-05, "loss": 1.3581, "step": 16615 }, { "epoch": 0.5950543449066199, "grad_norm": 1.3170970678329468, "learning_rate": 7.435999139284538e-05, "loss": 1.4601, "step": 16616 }, { "epoch": 0.5950901570361882, "grad_norm": 2.2052175998687744, "learning_rate": 7.434878029882814e-05, "loss": 1.5695, "step": 16617 }, { "epoch": 0.5951259691657564, "grad_norm": 1.6902546882629395, "learning_rate": 7.433756954991499e-05, "loss": 1.9375, "step": 16618 }, { "epoch": 0.5951617812953247, "grad_norm": 1.6723322868347168, "learning_rate": 7.43263591462567e-05, "loss": 1.5111, "step": 16619 }, { "epoch": 0.595197593424893, "grad_norm": 1.820304274559021, "learning_rate": 7.431514908800417e-05, "loss": 1.4817, "step": 16620 }, { "epoch": 0.5952334055544612, "grad_norm": 2.129159927368164, "learning_rate": 7.430393937530815e-05, "loss": 1.5987, "step": 16621 }, { "epoch": 0.5952692176840296, "grad_norm": 1.5790053606033325, "learning_rate": 7.429273000831949e-05, "loss": 1.5129, "step": 16622 }, { "epoch": 0.5953050298135979, "grad_norm": 1.2789185047149658, "learning_rate": 7.4281520987189e-05, "loss": 1.4375, "step": 16623 }, { "epoch": 0.5953408419431662, "grad_norm": 1.4424225091934204, "learning_rate": 7.427031231206745e-05, "loss": 1.2032, "step": 16624 }, { "epoch": 0.5953766540727344, "grad_norm": 2.2218496799468994, "learning_rate": 7.425910398310566e-05, "loss": 1.3099, "step": 16625 }, { "epoch": 0.5954124662023027, "grad_norm": 2.0501346588134766, "learning_rate": 7.424789600045444e-05, "loss": 1.5333, "step": 16626 }, { "epoch": 0.595448278331871, "grad_norm": 1.7061489820480347, "learning_rate": 7.423668836426458e-05, "loss": 1.6931, "step": 16627 }, { "epoch": 0.5954840904614392, "grad_norm": 2.1097772121429443, "learning_rate": 7.422548107468679e-05, "loss": 1.2726, "step": 16628 }, { "epoch": 0.5955199025910076, "grad_norm": 1.6923251152038574, "learning_rate": 7.421427413187197e-05, "loss": 1.7505, "step": 16629 }, { "epoch": 0.5955557147205759, "grad_norm": 1.5730112791061401, "learning_rate": 7.420306753597076e-05, "loss": 1.4845, "step": 16630 }, { "epoch": 0.5955915268501442, "grad_norm": 1.3994970321655273, "learning_rate": 7.419186128713407e-05, "loss": 1.2497, "step": 16631 }, { "epoch": 0.5956273389797124, "grad_norm": 1.8320046663284302, "learning_rate": 7.418065538551253e-05, "loss": 1.3596, "step": 16632 }, { "epoch": 0.5956631511092807, "grad_norm": 3.0585572719573975, "learning_rate": 7.4169449831257e-05, "loss": 1.7986, "step": 16633 }, { "epoch": 0.595698963238849, "grad_norm": 1.4562921524047852, "learning_rate": 7.415824462451824e-05, "loss": 1.3, "step": 16634 }, { "epoch": 0.5957347753684172, "grad_norm": 1.64504075050354, "learning_rate": 7.41470397654469e-05, "loss": 1.5275, "step": 16635 }, { "epoch": 0.5957705874979856, "grad_norm": 1.7294472455978394, "learning_rate": 7.413583525419385e-05, "loss": 1.3219, "step": 16636 }, { "epoch": 0.5958063996275539, "grad_norm": 1.6882243156433105, "learning_rate": 7.41246310909097e-05, "loss": 1.5301, "step": 16637 }, { "epoch": 0.5958422117571222, "grad_norm": 1.7502877712249756, "learning_rate": 7.411342727574533e-05, "loss": 1.6065, "step": 16638 }, { "epoch": 0.5958780238866904, "grad_norm": 1.149178385734558, "learning_rate": 7.410222380885135e-05, "loss": 1.2456, "step": 16639 }, { "epoch": 0.5959138360162587, "grad_norm": 1.7622125148773193, "learning_rate": 7.409102069037862e-05, "loss": 1.017, "step": 16640 }, { "epoch": 0.595949648145827, "grad_norm": 2.7804136276245117, "learning_rate": 7.407981792047769e-05, "loss": 1.479, "step": 16641 }, { "epoch": 0.5959854602753952, "grad_norm": 1.9207881689071655, "learning_rate": 7.406861549929946e-05, "loss": 1.4618, "step": 16642 }, { "epoch": 0.5960212724049636, "grad_norm": 1.6801304817199707, "learning_rate": 7.405741342699453e-05, "loss": 1.2735, "step": 16643 }, { "epoch": 0.5960570845345319, "grad_norm": 2.0418813228607178, "learning_rate": 7.404621170371362e-05, "loss": 1.4934, "step": 16644 }, { "epoch": 0.5960928966641001, "grad_norm": 1.2861592769622803, "learning_rate": 7.403501032960748e-05, "loss": 1.4293, "step": 16645 }, { "epoch": 0.5961287087936684, "grad_norm": 1.7379697561264038, "learning_rate": 7.402380930482673e-05, "loss": 1.3013, "step": 16646 }, { "epoch": 0.5961645209232367, "grad_norm": 1.4268943071365356, "learning_rate": 7.40126086295222e-05, "loss": 1.5411, "step": 16647 }, { "epoch": 0.596200333052805, "grad_norm": 1.8618836402893066, "learning_rate": 7.400140830384443e-05, "loss": 1.5819, "step": 16648 }, { "epoch": 0.5962361451823732, "grad_norm": 2.0353612899780273, "learning_rate": 7.399020832794424e-05, "loss": 1.4602, "step": 16649 }, { "epoch": 0.5962719573119416, "grad_norm": 1.4291691780090332, "learning_rate": 7.397900870197216e-05, "loss": 1.5755, "step": 16650 }, { "epoch": 0.5963077694415099, "grad_norm": 1.5784722566604614, "learning_rate": 7.396780942607904e-05, "loss": 1.2951, "step": 16651 }, { "epoch": 0.5963435815710781, "grad_norm": 2.4634132385253906, "learning_rate": 7.395661050041545e-05, "loss": 1.3903, "step": 16652 }, { "epoch": 0.5963793937006464, "grad_norm": 1.3411083221435547, "learning_rate": 7.394541192513202e-05, "loss": 1.5628, "step": 16653 }, { "epoch": 0.5964152058302147, "grad_norm": 1.625428557395935, "learning_rate": 7.393421370037952e-05, "loss": 1.4502, "step": 16654 }, { "epoch": 0.596451017959783, "grad_norm": 1.308756947517395, "learning_rate": 7.392301582630852e-05, "loss": 0.9473, "step": 16655 }, { "epoch": 0.5964868300893512, "grad_norm": 1.509726881980896, "learning_rate": 7.391181830306972e-05, "loss": 1.2107, "step": 16656 }, { "epoch": 0.5965226422189196, "grad_norm": 1.3004424571990967, "learning_rate": 7.390062113081373e-05, "loss": 1.6063, "step": 16657 }, { "epoch": 0.5965584543484879, "grad_norm": 1.2375189065933228, "learning_rate": 7.388942430969123e-05, "loss": 1.6043, "step": 16658 }, { "epoch": 0.5965942664780561, "grad_norm": 1.9687907695770264, "learning_rate": 7.387822783985283e-05, "loss": 1.3229, "step": 16659 }, { "epoch": 0.5966300786076244, "grad_norm": 2.021730422973633, "learning_rate": 7.386703172144921e-05, "loss": 1.2441, "step": 16660 }, { "epoch": 0.5966658907371927, "grad_norm": 1.6941550970077515, "learning_rate": 7.385583595463099e-05, "loss": 1.7031, "step": 16661 }, { "epoch": 0.5967017028667609, "grad_norm": 1.5119400024414062, "learning_rate": 7.384464053954872e-05, "loss": 1.5488, "step": 16662 }, { "epoch": 0.5967375149963292, "grad_norm": 1.5219899415969849, "learning_rate": 7.383344547635311e-05, "loss": 1.378, "step": 16663 }, { "epoch": 0.5967733271258976, "grad_norm": 1.6702772378921509, "learning_rate": 7.382225076519471e-05, "loss": 1.1648, "step": 16664 }, { "epoch": 0.5968091392554659, "grad_norm": 1.8304907083511353, "learning_rate": 7.381105640622419e-05, "loss": 1.3808, "step": 16665 }, { "epoch": 0.5968449513850341, "grad_norm": 1.6182093620300293, "learning_rate": 7.379986239959209e-05, "loss": 1.4653, "step": 16666 }, { "epoch": 0.5968807635146024, "grad_norm": 1.697573184967041, "learning_rate": 7.378866874544908e-05, "loss": 1.0452, "step": 16667 }, { "epoch": 0.5969165756441707, "grad_norm": 1.5860910415649414, "learning_rate": 7.377747544394568e-05, "loss": 1.2655, "step": 16668 }, { "epoch": 0.5969523877737389, "grad_norm": 1.4699441194534302, "learning_rate": 7.376628249523257e-05, "loss": 1.4476, "step": 16669 }, { "epoch": 0.5969881999033072, "grad_norm": 1.399438738822937, "learning_rate": 7.375508989946027e-05, "loss": 1.5486, "step": 16670 }, { "epoch": 0.5970240120328756, "grad_norm": 1.9118127822875977, "learning_rate": 7.374389765677938e-05, "loss": 1.3724, "step": 16671 }, { "epoch": 0.5970598241624439, "grad_norm": 1.6800737380981445, "learning_rate": 7.373270576734048e-05, "loss": 1.6724, "step": 16672 }, { "epoch": 0.5970956362920121, "grad_norm": 1.4176125526428223, "learning_rate": 7.372151423129414e-05, "loss": 1.4472, "step": 16673 }, { "epoch": 0.5971314484215804, "grad_norm": 1.7875847816467285, "learning_rate": 7.371032304879094e-05, "loss": 1.4848, "step": 16674 }, { "epoch": 0.5971672605511487, "grad_norm": 1.5433893203735352, "learning_rate": 7.369913221998141e-05, "loss": 1.4387, "step": 16675 }, { "epoch": 0.5972030726807169, "grad_norm": 2.274228572845459, "learning_rate": 7.368794174501615e-05, "loss": 1.5471, "step": 16676 }, { "epoch": 0.5972388848102852, "grad_norm": 2.2298521995544434, "learning_rate": 7.367675162404567e-05, "loss": 1.6403, "step": 16677 }, { "epoch": 0.5972746969398536, "grad_norm": 1.9916534423828125, "learning_rate": 7.366556185722056e-05, "loss": 1.4881, "step": 16678 }, { "epoch": 0.5973105090694218, "grad_norm": 2.0193593502044678, "learning_rate": 7.365437244469135e-05, "loss": 1.4196, "step": 16679 }, { "epoch": 0.5973463211989901, "grad_norm": 1.4831794500350952, "learning_rate": 7.364318338660858e-05, "loss": 1.7932, "step": 16680 }, { "epoch": 0.5973821333285584, "grad_norm": 1.533564567565918, "learning_rate": 7.363199468312277e-05, "loss": 1.289, "step": 16681 }, { "epoch": 0.5974179454581267, "grad_norm": 2.7487010955810547, "learning_rate": 7.362080633438445e-05, "loss": 1.6209, "step": 16682 }, { "epoch": 0.5974537575876949, "grad_norm": 1.5415210723876953, "learning_rate": 7.360961834054418e-05, "loss": 1.7845, "step": 16683 }, { "epoch": 0.5974895697172632, "grad_norm": 1.641719937324524, "learning_rate": 7.359843070175242e-05, "loss": 1.4157, "step": 16684 }, { "epoch": 0.5975253818468316, "grad_norm": 1.4241138696670532, "learning_rate": 7.358724341815975e-05, "loss": 1.3276, "step": 16685 }, { "epoch": 0.5975611939763998, "grad_norm": 2.071657419204712, "learning_rate": 7.357605648991661e-05, "loss": 1.293, "step": 16686 }, { "epoch": 0.5975970061059681, "grad_norm": 1.8686021566390991, "learning_rate": 7.356486991717359e-05, "loss": 1.3658, "step": 16687 }, { "epoch": 0.5976328182355364, "grad_norm": 2.0502803325653076, "learning_rate": 7.355368370008113e-05, "loss": 1.5443, "step": 16688 }, { "epoch": 0.5976686303651046, "grad_norm": 1.549391508102417, "learning_rate": 7.354249783878973e-05, "loss": 1.4303, "step": 16689 }, { "epoch": 0.5977044424946729, "grad_norm": 1.3476611375808716, "learning_rate": 7.353131233344991e-05, "loss": 1.569, "step": 16690 }, { "epoch": 0.5977402546242412, "grad_norm": 1.3979164361953735, "learning_rate": 7.352012718421212e-05, "loss": 1.2161, "step": 16691 }, { "epoch": 0.5977760667538096, "grad_norm": 2.414729595184326, "learning_rate": 7.350894239122689e-05, "loss": 1.8839, "step": 16692 }, { "epoch": 0.5978118788833778, "grad_norm": 1.8490568399429321, "learning_rate": 7.349775795464466e-05, "loss": 1.3075, "step": 16693 }, { "epoch": 0.5978476910129461, "grad_norm": 1.7273000478744507, "learning_rate": 7.348657387461591e-05, "loss": 1.4859, "step": 16694 }, { "epoch": 0.5978835031425144, "grad_norm": 1.6168313026428223, "learning_rate": 7.34753901512911e-05, "loss": 1.6608, "step": 16695 }, { "epoch": 0.5979193152720826, "grad_norm": 1.6971771717071533, "learning_rate": 7.346420678482071e-05, "loss": 1.2613, "step": 16696 }, { "epoch": 0.5979551274016509, "grad_norm": 1.7362477779388428, "learning_rate": 7.345302377535521e-05, "loss": 1.3894, "step": 16697 }, { "epoch": 0.5979909395312192, "grad_norm": 2.0205066204071045, "learning_rate": 7.3441841123045e-05, "loss": 1.6138, "step": 16698 }, { "epoch": 0.5980267516607876, "grad_norm": 2.0709989070892334, "learning_rate": 7.343065882804056e-05, "loss": 1.0796, "step": 16699 }, { "epoch": 0.5980625637903558, "grad_norm": 1.900892734527588, "learning_rate": 7.341947689049233e-05, "loss": 1.3942, "step": 16700 }, { "epoch": 0.5980983759199241, "grad_norm": 1.6259151697158813, "learning_rate": 7.340829531055078e-05, "loss": 1.0872, "step": 16701 }, { "epoch": 0.5981341880494924, "grad_norm": 2.2745935916900635, "learning_rate": 7.339711408836629e-05, "loss": 1.7529, "step": 16702 }, { "epoch": 0.5981700001790606, "grad_norm": 1.6044317483901978, "learning_rate": 7.338593322408933e-05, "loss": 1.6281, "step": 16703 }, { "epoch": 0.5982058123086289, "grad_norm": 2.226635217666626, "learning_rate": 7.33747527178703e-05, "loss": 1.595, "step": 16704 }, { "epoch": 0.5982416244381972, "grad_norm": 2.034383535385132, "learning_rate": 7.336357256985964e-05, "loss": 1.4936, "step": 16705 }, { "epoch": 0.5982774365677656, "grad_norm": 1.4976028203964233, "learning_rate": 7.335239278020776e-05, "loss": 1.532, "step": 16706 }, { "epoch": 0.5983132486973338, "grad_norm": 1.4269556999206543, "learning_rate": 7.334121334906503e-05, "loss": 1.4254, "step": 16707 }, { "epoch": 0.5983490608269021, "grad_norm": 1.4683499336242676, "learning_rate": 7.333003427658192e-05, "loss": 1.6565, "step": 16708 }, { "epoch": 0.5983848729564704, "grad_norm": 2.059978485107422, "learning_rate": 7.331885556290876e-05, "loss": 1.6096, "step": 16709 }, { "epoch": 0.5984206850860386, "grad_norm": 1.7053067684173584, "learning_rate": 7.330767720819601e-05, "loss": 1.6937, "step": 16710 }, { "epoch": 0.5984564972156069, "grad_norm": 2.052213191986084, "learning_rate": 7.329649921259402e-05, "loss": 1.4403, "step": 16711 }, { "epoch": 0.5984923093451752, "grad_norm": 1.58275306224823, "learning_rate": 7.32853215762532e-05, "loss": 1.3232, "step": 16712 }, { "epoch": 0.5985281214747435, "grad_norm": 1.7357984781265259, "learning_rate": 7.32741442993239e-05, "loss": 1.4713, "step": 16713 }, { "epoch": 0.5985639336043118, "grad_norm": 1.503201961517334, "learning_rate": 7.326296738195654e-05, "loss": 1.5623, "step": 16714 }, { "epoch": 0.5985997457338801, "grad_norm": 1.5212639570236206, "learning_rate": 7.325179082430148e-05, "loss": 1.3255, "step": 16715 }, { "epoch": 0.5986355578634484, "grad_norm": 1.991188645362854, "learning_rate": 7.324061462650901e-05, "loss": 1.4, "step": 16716 }, { "epoch": 0.5986713699930166, "grad_norm": 2.1019017696380615, "learning_rate": 7.32294387887296e-05, "loss": 1.3448, "step": 16717 }, { "epoch": 0.5987071821225849, "grad_norm": 1.5686511993408203, "learning_rate": 7.321826331111353e-05, "loss": 1.709, "step": 16718 }, { "epoch": 0.5987429942521532, "grad_norm": 1.8359214067459106, "learning_rate": 7.320708819381121e-05, "loss": 1.534, "step": 16719 }, { "epoch": 0.5987788063817215, "grad_norm": 1.9091947078704834, "learning_rate": 7.319591343697293e-05, "loss": 1.4201, "step": 16720 }, { "epoch": 0.5988146185112898, "grad_norm": 1.5145343542099, "learning_rate": 7.31847390407491e-05, "loss": 1.4507, "step": 16721 }, { "epoch": 0.5988504306408581, "grad_norm": 1.864920973777771, "learning_rate": 7.317356500528996e-05, "loss": 1.43, "step": 16722 }, { "epoch": 0.5988862427704263, "grad_norm": 1.6559187173843384, "learning_rate": 7.316239133074595e-05, "loss": 1.3573, "step": 16723 }, { "epoch": 0.5989220548999946, "grad_norm": 1.7337490320205688, "learning_rate": 7.315121801726737e-05, "loss": 1.2266, "step": 16724 }, { "epoch": 0.5989578670295629, "grad_norm": 1.527982473373413, "learning_rate": 7.314004506500443e-05, "loss": 1.6621, "step": 16725 }, { "epoch": 0.5989936791591312, "grad_norm": 1.4964022636413574, "learning_rate": 7.312887247410762e-05, "loss": 1.3453, "step": 16726 }, { "epoch": 0.5990294912886995, "grad_norm": 1.426389455795288, "learning_rate": 7.311770024472711e-05, "loss": 1.503, "step": 16727 }, { "epoch": 0.5990653034182678, "grad_norm": 1.8016306161880493, "learning_rate": 7.31065283770133e-05, "loss": 1.5566, "step": 16728 }, { "epoch": 0.5991011155478361, "grad_norm": 1.6403863430023193, "learning_rate": 7.309535687111644e-05, "loss": 1.613, "step": 16729 }, { "epoch": 0.5991369276774043, "grad_norm": 1.8085709810256958, "learning_rate": 7.308418572718687e-05, "loss": 1.7136, "step": 16730 }, { "epoch": 0.5991727398069726, "grad_norm": 1.6913074254989624, "learning_rate": 7.307301494537489e-05, "loss": 1.2859, "step": 16731 }, { "epoch": 0.5992085519365409, "grad_norm": 1.4169158935546875, "learning_rate": 7.306184452583067e-05, "loss": 1.6721, "step": 16732 }, { "epoch": 0.5992443640661091, "grad_norm": 1.5620194673538208, "learning_rate": 7.305067446870468e-05, "loss": 1.4362, "step": 16733 }, { "epoch": 0.5992801761956775, "grad_norm": 1.7782098054885864, "learning_rate": 7.303950477414703e-05, "loss": 1.5194, "step": 16734 }, { "epoch": 0.5993159883252458, "grad_norm": 1.9278711080551147, "learning_rate": 7.302833544230812e-05, "loss": 1.2504, "step": 16735 }, { "epoch": 0.5993518004548141, "grad_norm": 1.8028457164764404, "learning_rate": 7.301716647333812e-05, "loss": 1.0734, "step": 16736 }, { "epoch": 0.5993876125843823, "grad_norm": 1.6356738805770874, "learning_rate": 7.300599786738739e-05, "loss": 1.6322, "step": 16737 }, { "epoch": 0.5994234247139506, "grad_norm": 1.7443691492080688, "learning_rate": 7.299482962460607e-05, "loss": 1.4234, "step": 16738 }, { "epoch": 0.5994592368435189, "grad_norm": 1.5225340127944946, "learning_rate": 7.298366174514456e-05, "loss": 1.5641, "step": 16739 }, { "epoch": 0.5994950489730871, "grad_norm": 1.35145103931427, "learning_rate": 7.297249422915301e-05, "loss": 1.4949, "step": 16740 }, { "epoch": 0.5995308611026555, "grad_norm": 2.0662713050842285, "learning_rate": 7.296132707678166e-05, "loss": 1.4635, "step": 16741 }, { "epoch": 0.5995666732322238, "grad_norm": 1.6183621883392334, "learning_rate": 7.29501602881808e-05, "loss": 1.6438, "step": 16742 }, { "epoch": 0.5996024853617921, "grad_norm": 2.1140263080596924, "learning_rate": 7.29389938635006e-05, "loss": 1.778, "step": 16743 }, { "epoch": 0.5996382974913603, "grad_norm": 1.8030223846435547, "learning_rate": 7.292782780289141e-05, "loss": 1.6374, "step": 16744 }, { "epoch": 0.5996741096209286, "grad_norm": 1.7146693468093872, "learning_rate": 7.291666210650328e-05, "loss": 1.3417, "step": 16745 }, { "epoch": 0.5997099217504969, "grad_norm": 1.5830776691436768, "learning_rate": 7.290549677448661e-05, "loss": 1.4825, "step": 16746 }, { "epoch": 0.5997457338800651, "grad_norm": 2.0402321815490723, "learning_rate": 7.289433180699148e-05, "loss": 1.2888, "step": 16747 }, { "epoch": 0.5997815460096335, "grad_norm": 1.7229087352752686, "learning_rate": 7.28831672041682e-05, "loss": 1.4803, "step": 16748 }, { "epoch": 0.5998173581392018, "grad_norm": 1.6451315879821777, "learning_rate": 7.287200296616689e-05, "loss": 1.5543, "step": 16749 }, { "epoch": 0.59985317026877, "grad_norm": 2.058530569076538, "learning_rate": 7.286083909313779e-05, "loss": 1.4938, "step": 16750 }, { "epoch": 0.5998889823983383, "grad_norm": 1.7364871501922607, "learning_rate": 7.284967558523112e-05, "loss": 1.5158, "step": 16751 }, { "epoch": 0.5999247945279066, "grad_norm": 1.551857829093933, "learning_rate": 7.2838512442597e-05, "loss": 1.2004, "step": 16752 }, { "epoch": 0.5999606066574749, "grad_norm": 1.3351510763168335, "learning_rate": 7.282734966538569e-05, "loss": 1.6516, "step": 16753 }, { "epoch": 0.5999964187870431, "grad_norm": 1.86201012134552, "learning_rate": 7.281618725374733e-05, "loss": 1.3853, "step": 16754 }, { "epoch": 0.6000322309166115, "grad_norm": 1.8424255847930908, "learning_rate": 7.28050252078321e-05, "loss": 1.5038, "step": 16755 }, { "epoch": 0.6000680430461798, "grad_norm": 1.396933913230896, "learning_rate": 7.279386352779016e-05, "loss": 1.3897, "step": 16756 }, { "epoch": 0.600103855175748, "grad_norm": 1.5650379657745361, "learning_rate": 7.278270221377174e-05, "loss": 1.2216, "step": 16757 }, { "epoch": 0.6001396673053163, "grad_norm": 1.6911360025405884, "learning_rate": 7.277154126592695e-05, "loss": 1.413, "step": 16758 }, { "epoch": 0.6001754794348846, "grad_norm": 1.6357327699661255, "learning_rate": 7.276038068440592e-05, "loss": 1.1571, "step": 16759 }, { "epoch": 0.6002112915644529, "grad_norm": 1.6942464113235474, "learning_rate": 7.274922046935885e-05, "loss": 1.5146, "step": 16760 }, { "epoch": 0.6002471036940211, "grad_norm": 1.8150880336761475, "learning_rate": 7.273806062093585e-05, "loss": 1.4704, "step": 16761 }, { "epoch": 0.6002829158235895, "grad_norm": 1.3599683046340942, "learning_rate": 7.27269011392871e-05, "loss": 1.3613, "step": 16762 }, { "epoch": 0.6003187279531578, "grad_norm": 1.4246058464050293, "learning_rate": 7.271574202456268e-05, "loss": 1.6298, "step": 16763 }, { "epoch": 0.600354540082726, "grad_norm": 1.431622862815857, "learning_rate": 7.27045832769128e-05, "loss": 1.4792, "step": 16764 }, { "epoch": 0.6003903522122943, "grad_norm": 2.5366933345794678, "learning_rate": 7.269342489648752e-05, "loss": 1.4837, "step": 16765 }, { "epoch": 0.6004261643418626, "grad_norm": 1.9744517803192139, "learning_rate": 7.268226688343699e-05, "loss": 1.2326, "step": 16766 }, { "epoch": 0.6004619764714308, "grad_norm": 1.7462183237075806, "learning_rate": 7.267110923791133e-05, "loss": 1.5414, "step": 16767 }, { "epoch": 0.6004977886009991, "grad_norm": 1.5139094591140747, "learning_rate": 7.265995196006062e-05, "loss": 1.6375, "step": 16768 }, { "epoch": 0.6005336007305675, "grad_norm": 1.9999083280563354, "learning_rate": 7.264879505003502e-05, "loss": 1.3057, "step": 16769 }, { "epoch": 0.6005694128601358, "grad_norm": 1.6535240411758423, "learning_rate": 7.263763850798458e-05, "loss": 1.7807, "step": 16770 }, { "epoch": 0.600605224989704, "grad_norm": 1.6899582147598267, "learning_rate": 7.262648233405942e-05, "loss": 1.2351, "step": 16771 }, { "epoch": 0.6006410371192723, "grad_norm": 1.7519525289535522, "learning_rate": 7.261532652840964e-05, "loss": 1.4308, "step": 16772 }, { "epoch": 0.6006768492488406, "grad_norm": 1.5700947046279907, "learning_rate": 7.260417109118531e-05, "loss": 1.3495, "step": 16773 }, { "epoch": 0.6007126613784088, "grad_norm": 1.5926108360290527, "learning_rate": 7.259301602253652e-05, "loss": 1.6475, "step": 16774 }, { "epoch": 0.6007484735079771, "grad_norm": 1.5498440265655518, "learning_rate": 7.258186132261336e-05, "loss": 1.6338, "step": 16775 }, { "epoch": 0.6007842856375455, "grad_norm": 1.9977786540985107, "learning_rate": 7.25707069915659e-05, "loss": 1.3661, "step": 16776 }, { "epoch": 0.6008200977671138, "grad_norm": 1.361795425415039, "learning_rate": 7.255955302954416e-05, "loss": 1.2156, "step": 16777 }, { "epoch": 0.600855909896682, "grad_norm": 1.5042104721069336, "learning_rate": 7.254839943669826e-05, "loss": 1.3412, "step": 16778 }, { "epoch": 0.6008917220262503, "grad_norm": 1.656468391418457, "learning_rate": 7.253724621317822e-05, "loss": 1.5451, "step": 16779 }, { "epoch": 0.6009275341558186, "grad_norm": 1.2738529443740845, "learning_rate": 7.252609335913413e-05, "loss": 1.284, "step": 16780 }, { "epoch": 0.6009633462853868, "grad_norm": 1.751894235610962, "learning_rate": 7.251494087471599e-05, "loss": 1.5876, "step": 16781 }, { "epoch": 0.6009991584149551, "grad_norm": 1.967529058456421, "learning_rate": 7.250378876007389e-05, "loss": 1.2286, "step": 16782 }, { "epoch": 0.6010349705445235, "grad_norm": 1.7119519710540771, "learning_rate": 7.249263701535782e-05, "loss": 1.6704, "step": 16783 }, { "epoch": 0.6010707826740918, "grad_norm": 2.0444655418395996, "learning_rate": 7.248148564071787e-05, "loss": 1.3567, "step": 16784 }, { "epoch": 0.60110659480366, "grad_norm": 2.1970937252044678, "learning_rate": 7.247033463630402e-05, "loss": 1.665, "step": 16785 }, { "epoch": 0.6011424069332283, "grad_norm": 1.688825249671936, "learning_rate": 7.24591840022663e-05, "loss": 1.3525, "step": 16786 }, { "epoch": 0.6011782190627966, "grad_norm": 1.3591644763946533, "learning_rate": 7.244803373875475e-05, "loss": 1.4548, "step": 16787 }, { "epoch": 0.6012140311923648, "grad_norm": 2.0499813556671143, "learning_rate": 7.243688384591934e-05, "loss": 1.4286, "step": 16788 }, { "epoch": 0.6012498433219331, "grad_norm": 1.7680226564407349, "learning_rate": 7.242573432391012e-05, "loss": 1.5824, "step": 16789 }, { "epoch": 0.6012856554515015, "grad_norm": 1.749014139175415, "learning_rate": 7.241458517287708e-05, "loss": 1.6337, "step": 16790 }, { "epoch": 0.6013214675810697, "grad_norm": 1.4293020963668823, "learning_rate": 7.24034363929702e-05, "loss": 1.6886, "step": 16791 }, { "epoch": 0.601357279710638, "grad_norm": 1.5241018533706665, "learning_rate": 7.23922879843395e-05, "loss": 1.5131, "step": 16792 }, { "epoch": 0.6013930918402063, "grad_norm": 2.091395854949951, "learning_rate": 7.238113994713495e-05, "loss": 1.5334, "step": 16793 }, { "epoch": 0.6014289039697746, "grad_norm": 1.7409805059432983, "learning_rate": 7.236999228150654e-05, "loss": 1.5341, "step": 16794 }, { "epoch": 0.6014647160993428, "grad_norm": 1.6296451091766357, "learning_rate": 7.235884498760423e-05, "loss": 1.4788, "step": 16795 }, { "epoch": 0.6015005282289111, "grad_norm": 1.537999153137207, "learning_rate": 7.234769806557802e-05, "loss": 1.5781, "step": 16796 }, { "epoch": 0.6015363403584795, "grad_norm": 1.5315535068511963, "learning_rate": 7.233655151557786e-05, "loss": 1.5663, "step": 16797 }, { "epoch": 0.6015721524880477, "grad_norm": 1.7069329023361206, "learning_rate": 7.232540533775371e-05, "loss": 1.4498, "step": 16798 }, { "epoch": 0.601607964617616, "grad_norm": 1.808366298675537, "learning_rate": 7.231425953225552e-05, "loss": 1.7831, "step": 16799 }, { "epoch": 0.6016437767471843, "grad_norm": 1.2750335931777954, "learning_rate": 7.230311409923329e-05, "loss": 1.6704, "step": 16800 }, { "epoch": 0.6016795888767525, "grad_norm": 2.135979652404785, "learning_rate": 7.22919690388369e-05, "loss": 1.5089, "step": 16801 }, { "epoch": 0.6017154010063208, "grad_norm": 1.8281000852584839, "learning_rate": 7.228082435121636e-05, "loss": 1.7816, "step": 16802 }, { "epoch": 0.6017512131358891, "grad_norm": 1.9501540660858154, "learning_rate": 7.226968003652157e-05, "loss": 1.4132, "step": 16803 }, { "epoch": 0.6017870252654575, "grad_norm": 2.566105604171753, "learning_rate": 7.225853609490244e-05, "loss": 1.3759, "step": 16804 }, { "epoch": 0.6018228373950257, "grad_norm": 1.4011183977127075, "learning_rate": 7.224739252650894e-05, "loss": 1.3426, "step": 16805 }, { "epoch": 0.601858649524594, "grad_norm": 1.7511323690414429, "learning_rate": 7.223624933149095e-05, "loss": 1.3261, "step": 16806 }, { "epoch": 0.6018944616541623, "grad_norm": 2.664832830429077, "learning_rate": 7.222510650999845e-05, "loss": 1.573, "step": 16807 }, { "epoch": 0.6019302737837305, "grad_norm": 1.4400556087493896, "learning_rate": 7.221396406218129e-05, "loss": 1.2956, "step": 16808 }, { "epoch": 0.6019660859132988, "grad_norm": 1.4470281600952148, "learning_rate": 7.220282198818941e-05, "loss": 1.5883, "step": 16809 }, { "epoch": 0.6020018980428671, "grad_norm": 1.4601818323135376, "learning_rate": 7.21916802881727e-05, "loss": 1.4075, "step": 16810 }, { "epoch": 0.6020377101724355, "grad_norm": 1.3011351823806763, "learning_rate": 7.218053896228107e-05, "loss": 1.2083, "step": 16811 }, { "epoch": 0.6020735223020037, "grad_norm": 2.1279044151306152, "learning_rate": 7.216939801066444e-05, "loss": 1.5229, "step": 16812 }, { "epoch": 0.602109334431572, "grad_norm": 1.5820890665054321, "learning_rate": 7.215825743347259e-05, "loss": 1.2355, "step": 16813 }, { "epoch": 0.6021451465611403, "grad_norm": 1.6486124992370605, "learning_rate": 7.214711723085553e-05, "loss": 1.3337, "step": 16814 }, { "epoch": 0.6021809586907085, "grad_norm": 1.8116123676300049, "learning_rate": 7.213597740296304e-05, "loss": 1.3902, "step": 16815 }, { "epoch": 0.6022167708202768, "grad_norm": 1.5074397325515747, "learning_rate": 7.212483794994503e-05, "loss": 1.1192, "step": 16816 }, { "epoch": 0.6022525829498451, "grad_norm": 1.316300868988037, "learning_rate": 7.211369887195139e-05, "loss": 1.3241, "step": 16817 }, { "epoch": 0.6022883950794135, "grad_norm": 2.0315091609954834, "learning_rate": 7.210256016913195e-05, "loss": 1.5004, "step": 16818 }, { "epoch": 0.6023242072089817, "grad_norm": 1.8143523931503296, "learning_rate": 7.209142184163657e-05, "loss": 1.6522, "step": 16819 }, { "epoch": 0.60236001933855, "grad_norm": 1.3392434120178223, "learning_rate": 7.208028388961515e-05, "loss": 1.2916, "step": 16820 }, { "epoch": 0.6023958314681183, "grad_norm": 1.4817171096801758, "learning_rate": 7.206914631321749e-05, "loss": 1.3744, "step": 16821 }, { "epoch": 0.6024316435976865, "grad_norm": 1.1809767484664917, "learning_rate": 7.205800911259338e-05, "loss": 1.3433, "step": 16822 }, { "epoch": 0.6024674557272548, "grad_norm": 1.8400154113769531, "learning_rate": 7.204687228789279e-05, "loss": 1.5053, "step": 16823 }, { "epoch": 0.6025032678568231, "grad_norm": 1.5287377834320068, "learning_rate": 7.20357358392654e-05, "loss": 1.6242, "step": 16824 }, { "epoch": 0.6025390799863914, "grad_norm": 1.6333818435668945, "learning_rate": 7.202459976686118e-05, "loss": 1.249, "step": 16825 }, { "epoch": 0.6025748921159597, "grad_norm": 1.624626636505127, "learning_rate": 7.201346407082982e-05, "loss": 1.3627, "step": 16826 }, { "epoch": 0.602610704245528, "grad_norm": 1.5464283227920532, "learning_rate": 7.200232875132127e-05, "loss": 1.652, "step": 16827 }, { "epoch": 0.6026465163750963, "grad_norm": 1.678821325302124, "learning_rate": 7.199119380848525e-05, "loss": 1.4797, "step": 16828 }, { "epoch": 0.6026823285046645, "grad_norm": 1.8502992391586304, "learning_rate": 7.198005924247155e-05, "loss": 1.7994, "step": 16829 }, { "epoch": 0.6027181406342328, "grad_norm": 1.5439445972442627, "learning_rate": 7.196892505343007e-05, "loss": 1.1767, "step": 16830 }, { "epoch": 0.6027539527638011, "grad_norm": 1.6171842813491821, "learning_rate": 7.195779124151048e-05, "loss": 1.3557, "step": 16831 }, { "epoch": 0.6027897648933694, "grad_norm": 2.662693500518799, "learning_rate": 7.19466578068627e-05, "loss": 1.3941, "step": 16832 }, { "epoch": 0.6028255770229377, "grad_norm": 1.5511195659637451, "learning_rate": 7.193552474963638e-05, "loss": 1.5937, "step": 16833 }, { "epoch": 0.602861389152506, "grad_norm": 1.55390202999115, "learning_rate": 7.192439206998146e-05, "loss": 1.432, "step": 16834 }, { "epoch": 0.6028972012820742, "grad_norm": 1.8486980199813843, "learning_rate": 7.191325976804754e-05, "loss": 1.3208, "step": 16835 }, { "epoch": 0.6029330134116425, "grad_norm": 2.2025701999664307, "learning_rate": 7.190212784398458e-05, "loss": 1.6723, "step": 16836 }, { "epoch": 0.6029688255412108, "grad_norm": 1.3402276039123535, "learning_rate": 7.18909962979422e-05, "loss": 1.279, "step": 16837 }, { "epoch": 0.603004637670779, "grad_norm": 1.858518362045288, "learning_rate": 7.187986513007018e-05, "loss": 1.645, "step": 16838 }, { "epoch": 0.6030404498003473, "grad_norm": 1.7115235328674316, "learning_rate": 7.186873434051832e-05, "loss": 1.5733, "step": 16839 }, { "epoch": 0.6030762619299157, "grad_norm": 1.9213659763336182, "learning_rate": 7.185760392943637e-05, "loss": 1.2916, "step": 16840 }, { "epoch": 0.603112074059484, "grad_norm": 1.2515188455581665, "learning_rate": 7.184647389697405e-05, "loss": 1.5181, "step": 16841 }, { "epoch": 0.6031478861890522, "grad_norm": 1.9560471773147583, "learning_rate": 7.183534424328106e-05, "loss": 1.5256, "step": 16842 }, { "epoch": 0.6031836983186205, "grad_norm": 1.352102518081665, "learning_rate": 7.182421496850726e-05, "loss": 1.4133, "step": 16843 }, { "epoch": 0.6032195104481888, "grad_norm": 1.5485777854919434, "learning_rate": 7.181308607280223e-05, "loss": 1.5151, "step": 16844 }, { "epoch": 0.603255322577757, "grad_norm": 1.3369213342666626, "learning_rate": 7.180195755631584e-05, "loss": 1.2579, "step": 16845 }, { "epoch": 0.6032911347073253, "grad_norm": 2.329124689102173, "learning_rate": 7.179082941919773e-05, "loss": 1.3283, "step": 16846 }, { "epoch": 0.6033269468368937, "grad_norm": 1.8115071058273315, "learning_rate": 7.177970166159758e-05, "loss": 1.5473, "step": 16847 }, { "epoch": 0.603362758966462, "grad_norm": 1.4668792486190796, "learning_rate": 7.176857428366517e-05, "loss": 1.108, "step": 16848 }, { "epoch": 0.6033985710960302, "grad_norm": 1.9151508808135986, "learning_rate": 7.175744728555016e-05, "loss": 1.3883, "step": 16849 }, { "epoch": 0.6034343832255985, "grad_norm": 1.7193862199783325, "learning_rate": 7.174632066740227e-05, "loss": 1.9886, "step": 16850 }, { "epoch": 0.6034701953551668, "grad_norm": 1.6349965333938599, "learning_rate": 7.17351944293712e-05, "loss": 1.7273, "step": 16851 }, { "epoch": 0.603506007484735, "grad_norm": 1.7464351654052734, "learning_rate": 7.172406857160662e-05, "loss": 1.6175, "step": 16852 }, { "epoch": 0.6035418196143033, "grad_norm": 1.512633204460144, "learning_rate": 7.171294309425823e-05, "loss": 1.5267, "step": 16853 }, { "epoch": 0.6035776317438717, "grad_norm": 1.607930064201355, "learning_rate": 7.17018179974757e-05, "loss": 1.4871, "step": 16854 }, { "epoch": 0.60361344387344, "grad_norm": 1.959700345993042, "learning_rate": 7.169069328140872e-05, "loss": 1.3557, "step": 16855 }, { "epoch": 0.6036492560030082, "grad_norm": 1.5639288425445557, "learning_rate": 7.167956894620694e-05, "loss": 1.1726, "step": 16856 }, { "epoch": 0.6036850681325765, "grad_norm": 1.6049681901931763, "learning_rate": 7.166844499202002e-05, "loss": 1.658, "step": 16857 }, { "epoch": 0.6037208802621448, "grad_norm": 1.4732575416564941, "learning_rate": 7.165732141899761e-05, "loss": 1.4717, "step": 16858 }, { "epoch": 0.603756692391713, "grad_norm": 2.0398313999176025, "learning_rate": 7.164619822728941e-05, "loss": 1.4927, "step": 16859 }, { "epoch": 0.6037925045212813, "grad_norm": 1.3296444416046143, "learning_rate": 7.163507541704503e-05, "loss": 1.5785, "step": 16860 }, { "epoch": 0.6038283166508497, "grad_norm": 1.3880574703216553, "learning_rate": 7.162395298841414e-05, "loss": 1.5667, "step": 16861 }, { "epoch": 0.603864128780418, "grad_norm": 1.4414122104644775, "learning_rate": 7.161283094154633e-05, "loss": 1.5618, "step": 16862 }, { "epoch": 0.6038999409099862, "grad_norm": 2.2014966011047363, "learning_rate": 7.160170927659128e-05, "loss": 1.4817, "step": 16863 }, { "epoch": 0.6039357530395545, "grad_norm": 2.323162078857422, "learning_rate": 7.159058799369861e-05, "loss": 1.346, "step": 16864 }, { "epoch": 0.6039715651691228, "grad_norm": 1.2874650955200195, "learning_rate": 7.157946709301791e-05, "loss": 0.9955, "step": 16865 }, { "epoch": 0.604007377298691, "grad_norm": 1.3906731605529785, "learning_rate": 7.156834657469885e-05, "loss": 1.5495, "step": 16866 }, { "epoch": 0.6040431894282593, "grad_norm": 1.6526511907577515, "learning_rate": 7.155722643889097e-05, "loss": 1.6115, "step": 16867 }, { "epoch": 0.6040790015578277, "grad_norm": 1.9570540189743042, "learning_rate": 7.154610668574395e-05, "loss": 1.0826, "step": 16868 }, { "epoch": 0.604114813687396, "grad_norm": 1.2908234596252441, "learning_rate": 7.153498731540735e-05, "loss": 1.1568, "step": 16869 }, { "epoch": 0.6041506258169642, "grad_norm": 2.093600273132324, "learning_rate": 7.15238683280308e-05, "loss": 1.6383, "step": 16870 }, { "epoch": 0.6041864379465325, "grad_norm": 1.7811310291290283, "learning_rate": 7.151274972376383e-05, "loss": 1.1835, "step": 16871 }, { "epoch": 0.6042222500761008, "grad_norm": 1.4339679479599, "learning_rate": 7.15016315027561e-05, "loss": 1.1261, "step": 16872 }, { "epoch": 0.604258062205669, "grad_norm": 1.8503432273864746, "learning_rate": 7.149051366515716e-05, "loss": 1.7787, "step": 16873 }, { "epoch": 0.6042938743352373, "grad_norm": 1.6346187591552734, "learning_rate": 7.147939621111655e-05, "loss": 1.658, "step": 16874 }, { "epoch": 0.6043296864648057, "grad_norm": 1.4566487073898315, "learning_rate": 7.146827914078391e-05, "loss": 1.5136, "step": 16875 }, { "epoch": 0.6043654985943739, "grad_norm": 1.6601160764694214, "learning_rate": 7.145716245430876e-05, "loss": 1.5001, "step": 16876 }, { "epoch": 0.6044013107239422, "grad_norm": 1.377268671989441, "learning_rate": 7.144604615184067e-05, "loss": 1.6418, "step": 16877 }, { "epoch": 0.6044371228535105, "grad_norm": 1.7275559902191162, "learning_rate": 7.143493023352918e-05, "loss": 1.5162, "step": 16878 }, { "epoch": 0.6044729349830787, "grad_norm": 1.9887871742248535, "learning_rate": 7.142381469952388e-05, "loss": 1.7494, "step": 16879 }, { "epoch": 0.604508747112647, "grad_norm": 1.5621000528335571, "learning_rate": 7.141269954997428e-05, "loss": 1.5148, "step": 16880 }, { "epoch": 0.6045445592422153, "grad_norm": 1.5300337076187134, "learning_rate": 7.140158478502995e-05, "loss": 1.4512, "step": 16881 }, { "epoch": 0.6045803713717837, "grad_norm": 1.5173213481903076, "learning_rate": 7.13904704048404e-05, "loss": 1.2817, "step": 16882 }, { "epoch": 0.6046161835013519, "grad_norm": 1.6534255743026733, "learning_rate": 7.137935640955516e-05, "loss": 1.5485, "step": 16883 }, { "epoch": 0.6046519956309202, "grad_norm": 2.1657660007476807, "learning_rate": 7.136824279932378e-05, "loss": 1.3278, "step": 16884 }, { "epoch": 0.6046878077604885, "grad_norm": 1.8624151945114136, "learning_rate": 7.135712957429573e-05, "loss": 1.2911, "step": 16885 }, { "epoch": 0.6047236198900567, "grad_norm": 2.1512229442596436, "learning_rate": 7.134601673462058e-05, "loss": 1.5735, "step": 16886 }, { "epoch": 0.604759432019625, "grad_norm": 1.6311861276626587, "learning_rate": 7.133490428044778e-05, "loss": 1.2567, "step": 16887 }, { "epoch": 0.6047952441491933, "grad_norm": 1.3144361972808838, "learning_rate": 7.132379221192691e-05, "loss": 1.1432, "step": 16888 }, { "epoch": 0.6048310562787617, "grad_norm": 2.04892635345459, "learning_rate": 7.131268052920739e-05, "loss": 1.3096, "step": 16889 }, { "epoch": 0.6048668684083299, "grad_norm": 1.3833351135253906, "learning_rate": 7.130156923243879e-05, "loss": 1.5811, "step": 16890 }, { "epoch": 0.6049026805378982, "grad_norm": 1.4970839023590088, "learning_rate": 7.129045832177054e-05, "loss": 1.2673, "step": 16891 }, { "epoch": 0.6049384926674665, "grad_norm": 1.5738197565078735, "learning_rate": 7.127934779735212e-05, "loss": 1.552, "step": 16892 }, { "epoch": 0.6049743047970347, "grad_norm": 1.8134737014770508, "learning_rate": 7.126823765933306e-05, "loss": 1.4541, "step": 16893 }, { "epoch": 0.605010116926603, "grad_norm": 1.7841168642044067, "learning_rate": 7.125712790786277e-05, "loss": 1.3122, "step": 16894 }, { "epoch": 0.6050459290561713, "grad_norm": 1.7300609350204468, "learning_rate": 7.124601854309077e-05, "loss": 1.6758, "step": 16895 }, { "epoch": 0.6050817411857397, "grad_norm": 1.904471516609192, "learning_rate": 7.123490956516649e-05, "loss": 1.5818, "step": 16896 }, { "epoch": 0.6051175533153079, "grad_norm": 1.6749107837677002, "learning_rate": 7.122380097423941e-05, "loss": 1.8399, "step": 16897 }, { "epoch": 0.6051533654448762, "grad_norm": 1.8210564851760864, "learning_rate": 7.121269277045894e-05, "loss": 1.3766, "step": 16898 }, { "epoch": 0.6051891775744445, "grad_norm": 1.5455830097198486, "learning_rate": 7.120158495397459e-05, "loss": 1.7764, "step": 16899 }, { "epoch": 0.6052249897040127, "grad_norm": 1.4964015483856201, "learning_rate": 7.119047752493576e-05, "loss": 1.3216, "step": 16900 }, { "epoch": 0.605260801833581, "grad_norm": 1.3908185958862305, "learning_rate": 7.117937048349188e-05, "loss": 1.5999, "step": 16901 }, { "epoch": 0.6052966139631493, "grad_norm": 1.8004734516143799, "learning_rate": 7.11682638297924e-05, "loss": 1.818, "step": 16902 }, { "epoch": 0.6053324260927176, "grad_norm": 1.4848984479904175, "learning_rate": 7.115715756398674e-05, "loss": 1.5299, "step": 16903 }, { "epoch": 0.6053682382222859, "grad_norm": 1.5885624885559082, "learning_rate": 7.114605168622432e-05, "loss": 1.8183, "step": 16904 }, { "epoch": 0.6054040503518542, "grad_norm": 1.8533222675323486, "learning_rate": 7.113494619665456e-05, "loss": 1.5371, "step": 16905 }, { "epoch": 0.6054398624814225, "grad_norm": 2.0348994731903076, "learning_rate": 7.112384109542687e-05, "loss": 1.1959, "step": 16906 }, { "epoch": 0.6054756746109907, "grad_norm": 1.8801383972167969, "learning_rate": 7.111273638269063e-05, "loss": 1.6672, "step": 16907 }, { "epoch": 0.605511486740559, "grad_norm": 2.084892749786377, "learning_rate": 7.110163205859528e-05, "loss": 1.6724, "step": 16908 }, { "epoch": 0.6055472988701273, "grad_norm": 1.7891219854354858, "learning_rate": 7.109052812329023e-05, "loss": 1.126, "step": 16909 }, { "epoch": 0.6055831109996956, "grad_norm": 2.057370901107788, "learning_rate": 7.107942457692475e-05, "loss": 1.6619, "step": 16910 }, { "epoch": 0.6056189231292639, "grad_norm": 1.486271858215332, "learning_rate": 7.106832141964839e-05, "loss": 1.3337, "step": 16911 }, { "epoch": 0.6056547352588322, "grad_norm": 1.9159584045410156, "learning_rate": 7.105721865161037e-05, "loss": 1.5583, "step": 16912 }, { "epoch": 0.6056905473884004, "grad_norm": 1.8747551441192627, "learning_rate": 7.104611627296018e-05, "loss": 1.3585, "step": 16913 }, { "epoch": 0.6057263595179687, "grad_norm": 1.439809799194336, "learning_rate": 7.103501428384714e-05, "loss": 1.4312, "step": 16914 }, { "epoch": 0.605762171647537, "grad_norm": 1.6186916828155518, "learning_rate": 7.102391268442062e-05, "loss": 1.4321, "step": 16915 }, { "epoch": 0.6057979837771053, "grad_norm": 1.4521013498306274, "learning_rate": 7.101281147482996e-05, "loss": 1.4515, "step": 16916 }, { "epoch": 0.6058337959066736, "grad_norm": 1.5437028408050537, "learning_rate": 7.100171065522457e-05, "loss": 1.3871, "step": 16917 }, { "epoch": 0.6058696080362419, "grad_norm": 1.9497326612472534, "learning_rate": 7.099061022575377e-05, "loss": 1.6164, "step": 16918 }, { "epoch": 0.6059054201658102, "grad_norm": 1.3493622541427612, "learning_rate": 7.097951018656683e-05, "loss": 1.4338, "step": 16919 }, { "epoch": 0.6059412322953784, "grad_norm": 1.541733741760254, "learning_rate": 7.09684105378132e-05, "loss": 1.3511, "step": 16920 }, { "epoch": 0.6059770444249467, "grad_norm": 1.4317058324813843, "learning_rate": 7.095731127964211e-05, "loss": 1.3761, "step": 16921 }, { "epoch": 0.606012856554515, "grad_norm": 1.4599525928497314, "learning_rate": 7.0946212412203e-05, "loss": 1.8323, "step": 16922 }, { "epoch": 0.6060486686840832, "grad_norm": 1.6733118295669556, "learning_rate": 7.093511393564504e-05, "loss": 1.6659, "step": 16923 }, { "epoch": 0.6060844808136516, "grad_norm": 1.553957462310791, "learning_rate": 7.092401585011771e-05, "loss": 1.3088, "step": 16924 }, { "epoch": 0.6061202929432199, "grad_norm": 1.6866424083709717, "learning_rate": 7.091291815577022e-05, "loss": 1.6451, "step": 16925 }, { "epoch": 0.6061561050727882, "grad_norm": 1.5042775869369507, "learning_rate": 7.090182085275185e-05, "loss": 1.5038, "step": 16926 }, { "epoch": 0.6061919172023564, "grad_norm": 1.7671167850494385, "learning_rate": 7.089072394121201e-05, "loss": 1.4282, "step": 16927 }, { "epoch": 0.6062277293319247, "grad_norm": 1.7589083909988403, "learning_rate": 7.087962742129988e-05, "loss": 1.4675, "step": 16928 }, { "epoch": 0.606263541461493, "grad_norm": 1.4512827396392822, "learning_rate": 7.086853129316484e-05, "loss": 1.5013, "step": 16929 }, { "epoch": 0.6062993535910612, "grad_norm": 1.6962764263153076, "learning_rate": 7.085743555695609e-05, "loss": 1.1876, "step": 16930 }, { "epoch": 0.6063351657206296, "grad_norm": 2.0834460258483887, "learning_rate": 7.084634021282301e-05, "loss": 1.6448, "step": 16931 }, { "epoch": 0.6063709778501979, "grad_norm": 1.53248131275177, "learning_rate": 7.083524526091475e-05, "loss": 1.4959, "step": 16932 }, { "epoch": 0.6064067899797662, "grad_norm": 1.4516412019729614, "learning_rate": 7.082415070138071e-05, "loss": 1.6379, "step": 16933 }, { "epoch": 0.6064426021093344, "grad_norm": 1.47740638256073, "learning_rate": 7.081305653437007e-05, "loss": 1.4495, "step": 16934 }, { "epoch": 0.6064784142389027, "grad_norm": 1.6300405263900757, "learning_rate": 7.080196276003209e-05, "loss": 1.4695, "step": 16935 }, { "epoch": 0.606514226368471, "grad_norm": 2.4109437465667725, "learning_rate": 7.079086937851604e-05, "loss": 1.3669, "step": 16936 }, { "epoch": 0.6065500384980392, "grad_norm": 1.4852944612503052, "learning_rate": 7.077977638997117e-05, "loss": 1.5768, "step": 16937 }, { "epoch": 0.6065858506276076, "grad_norm": 2.6029889583587646, "learning_rate": 7.076868379454673e-05, "loss": 1.6087, "step": 16938 }, { "epoch": 0.6066216627571759, "grad_norm": 1.5269404649734497, "learning_rate": 7.07575915923919e-05, "loss": 1.405, "step": 16939 }, { "epoch": 0.6066574748867442, "grad_norm": 1.8272013664245605, "learning_rate": 7.074649978365602e-05, "loss": 1.2481, "step": 16940 }, { "epoch": 0.6066932870163124, "grad_norm": 1.7521268129348755, "learning_rate": 7.073540836848817e-05, "loss": 1.5097, "step": 16941 }, { "epoch": 0.6067290991458807, "grad_norm": 2.199578046798706, "learning_rate": 7.072431734703772e-05, "loss": 1.7481, "step": 16942 }, { "epoch": 0.606764911275449, "grad_norm": 1.9226924180984497, "learning_rate": 7.071322671945382e-05, "loss": 1.5111, "step": 16943 }, { "epoch": 0.6068007234050172, "grad_norm": 1.5912048816680908, "learning_rate": 7.070213648588564e-05, "loss": 1.5916, "step": 16944 }, { "epoch": 0.6068365355345856, "grad_norm": 1.532690167427063, "learning_rate": 7.069104664648244e-05, "loss": 1.4422, "step": 16945 }, { "epoch": 0.6068723476641539, "grad_norm": 2.0913636684417725, "learning_rate": 7.06799572013934e-05, "loss": 1.6614, "step": 16946 }, { "epoch": 0.6069081597937221, "grad_norm": 1.6115814447402954, "learning_rate": 7.066886815076771e-05, "loss": 1.3298, "step": 16947 }, { "epoch": 0.6069439719232904, "grad_norm": 1.4363234043121338, "learning_rate": 7.065777949475456e-05, "loss": 1.3889, "step": 16948 }, { "epoch": 0.6069797840528587, "grad_norm": 1.3749061822891235, "learning_rate": 7.064669123350316e-05, "loss": 1.3767, "step": 16949 }, { "epoch": 0.607015596182427, "grad_norm": 1.6005500555038452, "learning_rate": 7.063560336716263e-05, "loss": 1.4588, "step": 16950 }, { "epoch": 0.6070514083119952, "grad_norm": 1.9682674407958984, "learning_rate": 7.062451589588221e-05, "loss": 1.2503, "step": 16951 }, { "epoch": 0.6070872204415636, "grad_norm": 1.6852772235870361, "learning_rate": 7.061342881981105e-05, "loss": 1.4176, "step": 16952 }, { "epoch": 0.6071230325711319, "grad_norm": 1.2630378007888794, "learning_rate": 7.060234213909826e-05, "loss": 1.5105, "step": 16953 }, { "epoch": 0.6071588447007001, "grad_norm": 1.6143516302108765, "learning_rate": 7.059125585389306e-05, "loss": 1.5111, "step": 16954 }, { "epoch": 0.6071946568302684, "grad_norm": 1.6906598806381226, "learning_rate": 7.058016996434455e-05, "loss": 1.713, "step": 16955 }, { "epoch": 0.6072304689598367, "grad_norm": 1.9878149032592773, "learning_rate": 7.056908447060195e-05, "loss": 1.4993, "step": 16956 }, { "epoch": 0.607266281089405, "grad_norm": 1.3433419466018677, "learning_rate": 7.055799937281432e-05, "loss": 1.6973, "step": 16957 }, { "epoch": 0.6073020932189732, "grad_norm": 1.7787431478500366, "learning_rate": 7.054691467113085e-05, "loss": 1.4692, "step": 16958 }, { "epoch": 0.6073379053485416, "grad_norm": 1.5732979774475098, "learning_rate": 7.053583036570064e-05, "loss": 1.3024, "step": 16959 }, { "epoch": 0.6073737174781099, "grad_norm": 2.0106754302978516, "learning_rate": 7.052474645667283e-05, "loss": 1.637, "step": 16960 }, { "epoch": 0.6074095296076781, "grad_norm": 1.5223397016525269, "learning_rate": 7.051366294419655e-05, "loss": 1.3299, "step": 16961 }, { "epoch": 0.6074453417372464, "grad_norm": 1.6376421451568604, "learning_rate": 7.050257982842088e-05, "loss": 1.5557, "step": 16962 }, { "epoch": 0.6074811538668147, "grad_norm": 1.5301560163497925, "learning_rate": 7.049149710949497e-05, "loss": 1.5164, "step": 16963 }, { "epoch": 0.6075169659963829, "grad_norm": 1.3848516941070557, "learning_rate": 7.048041478756786e-05, "loss": 1.5645, "step": 16964 }, { "epoch": 0.6075527781259512, "grad_norm": 2.091106653213501, "learning_rate": 7.046933286278874e-05, "loss": 1.2488, "step": 16965 }, { "epoch": 0.6075885902555196, "grad_norm": 1.3417564630508423, "learning_rate": 7.04582513353066e-05, "loss": 1.3523, "step": 16966 }, { "epoch": 0.6076244023850879, "grad_norm": 1.6278449296951294, "learning_rate": 7.044717020527065e-05, "loss": 1.6019, "step": 16967 }, { "epoch": 0.6076602145146561, "grad_norm": 1.618781566619873, "learning_rate": 7.043608947282985e-05, "loss": 1.5129, "step": 16968 }, { "epoch": 0.6076960266442244, "grad_norm": 1.8264074325561523, "learning_rate": 7.042500913813337e-05, "loss": 1.6284, "step": 16969 }, { "epoch": 0.6077318387737927, "grad_norm": 1.528173565864563, "learning_rate": 7.041392920133024e-05, "loss": 1.384, "step": 16970 }, { "epoch": 0.6077676509033609, "grad_norm": 1.6601237058639526, "learning_rate": 7.040284966256949e-05, "loss": 1.5068, "step": 16971 }, { "epoch": 0.6078034630329292, "grad_norm": 1.6839256286621094, "learning_rate": 7.039177052200026e-05, "loss": 1.4348, "step": 16972 }, { "epoch": 0.6078392751624976, "grad_norm": 1.93649160861969, "learning_rate": 7.038069177977153e-05, "loss": 1.2951, "step": 16973 }, { "epoch": 0.6078750872920659, "grad_norm": 2.453674077987671, "learning_rate": 7.036961343603243e-05, "loss": 1.6723, "step": 16974 }, { "epoch": 0.6079108994216341, "grad_norm": 1.814399242401123, "learning_rate": 7.035853549093192e-05, "loss": 1.5456, "step": 16975 }, { "epoch": 0.6079467115512024, "grad_norm": 1.8552604913711548, "learning_rate": 7.034745794461912e-05, "loss": 1.6286, "step": 16976 }, { "epoch": 0.6079825236807707, "grad_norm": 1.676199197769165, "learning_rate": 7.033638079724298e-05, "loss": 1.4652, "step": 16977 }, { "epoch": 0.6080183358103389, "grad_norm": 2.279174566268921, "learning_rate": 7.032530404895262e-05, "loss": 1.6084, "step": 16978 }, { "epoch": 0.6080541479399072, "grad_norm": 1.4861416816711426, "learning_rate": 7.0314227699897e-05, "loss": 1.3948, "step": 16979 }, { "epoch": 0.6080899600694756, "grad_norm": 1.480503797531128, "learning_rate": 7.030315175022513e-05, "loss": 1.322, "step": 16980 }, { "epoch": 0.6081257721990438, "grad_norm": 1.4660117626190186, "learning_rate": 7.029207620008606e-05, "loss": 1.6761, "step": 16981 }, { "epoch": 0.6081615843286121, "grad_norm": 1.395601749420166, "learning_rate": 7.028100104962878e-05, "loss": 1.3519, "step": 16982 }, { "epoch": 0.6081973964581804, "grad_norm": 1.7928545475006104, "learning_rate": 7.026992629900232e-05, "loss": 1.4069, "step": 16983 }, { "epoch": 0.6082332085877487, "grad_norm": 1.6177374124526978, "learning_rate": 7.025885194835562e-05, "loss": 1.3573, "step": 16984 }, { "epoch": 0.6082690207173169, "grad_norm": 2.3778645992279053, "learning_rate": 7.024777799783774e-05, "loss": 1.6212, "step": 16985 }, { "epoch": 0.6083048328468852, "grad_norm": 1.5643190145492554, "learning_rate": 7.02367044475976e-05, "loss": 1.2325, "step": 16986 }, { "epoch": 0.6083406449764536, "grad_norm": 1.7111154794692993, "learning_rate": 7.022563129778422e-05, "loss": 1.6208, "step": 16987 }, { "epoch": 0.6083764571060218, "grad_norm": 2.059918165206909, "learning_rate": 7.021455854854657e-05, "loss": 1.5891, "step": 16988 }, { "epoch": 0.6084122692355901, "grad_norm": 1.2656056880950928, "learning_rate": 7.020348620003361e-05, "loss": 1.5974, "step": 16989 }, { "epoch": 0.6084480813651584, "grad_norm": 1.4218404293060303, "learning_rate": 7.019241425239432e-05, "loss": 1.4523, "step": 16990 }, { "epoch": 0.6084838934947266, "grad_norm": 1.8264952898025513, "learning_rate": 7.018134270577761e-05, "loss": 1.4599, "step": 16991 }, { "epoch": 0.6085197056242949, "grad_norm": 1.4522677659988403, "learning_rate": 7.017027156033252e-05, "loss": 1.7525, "step": 16992 }, { "epoch": 0.6085555177538632, "grad_norm": 1.762906789779663, "learning_rate": 7.01592008162079e-05, "loss": 1.3414, "step": 16993 }, { "epoch": 0.6085913298834316, "grad_norm": 1.9561679363250732, "learning_rate": 7.014813047355277e-05, "loss": 1.4562, "step": 16994 }, { "epoch": 0.6086271420129998, "grad_norm": 2.302292823791504, "learning_rate": 7.013706053251603e-05, "loss": 1.5998, "step": 16995 }, { "epoch": 0.6086629541425681, "grad_norm": 1.6201292276382446, "learning_rate": 7.012599099324662e-05, "loss": 1.3422, "step": 16996 }, { "epoch": 0.6086987662721364, "grad_norm": 1.6970531940460205, "learning_rate": 7.011492185589349e-05, "loss": 1.5323, "step": 16997 }, { "epoch": 0.6087345784017046, "grad_norm": 1.6018362045288086, "learning_rate": 7.01038531206055e-05, "loss": 1.5924, "step": 16998 }, { "epoch": 0.6087703905312729, "grad_norm": 1.6218585968017578, "learning_rate": 7.009278478753162e-05, "loss": 1.3095, "step": 16999 }, { "epoch": 0.6088062026608412, "grad_norm": 1.3571292161941528, "learning_rate": 7.008171685682074e-05, "loss": 1.1314, "step": 17000 }, { "epoch": 0.6088420147904096, "grad_norm": 2.037933588027954, "learning_rate": 7.007064932862178e-05, "loss": 1.467, "step": 17001 }, { "epoch": 0.6088778269199778, "grad_norm": 1.4938690662384033, "learning_rate": 7.005958220308362e-05, "loss": 1.5368, "step": 17002 }, { "epoch": 0.6089136390495461, "grad_norm": 2.2486536502838135, "learning_rate": 7.004851548035516e-05, "loss": 1.234, "step": 17003 }, { "epoch": 0.6089494511791144, "grad_norm": 1.8166847229003906, "learning_rate": 7.003744916058528e-05, "loss": 1.2638, "step": 17004 }, { "epoch": 0.6089852633086826, "grad_norm": 1.850803017616272, "learning_rate": 7.00263832439229e-05, "loss": 1.3371, "step": 17005 }, { "epoch": 0.6090210754382509, "grad_norm": 2.1468968391418457, "learning_rate": 7.001531773051688e-05, "loss": 1.1834, "step": 17006 }, { "epoch": 0.6090568875678192, "grad_norm": 1.7560291290283203, "learning_rate": 7.000425262051602e-05, "loss": 1.7774, "step": 17007 }, { "epoch": 0.6090926996973876, "grad_norm": 1.929193139076233, "learning_rate": 6.999318791406931e-05, "loss": 1.3495, "step": 17008 }, { "epoch": 0.6091285118269558, "grad_norm": 1.4060529470443726, "learning_rate": 6.998212361132549e-05, "loss": 1.3532, "step": 17009 }, { "epoch": 0.6091643239565241, "grad_norm": 1.2726495265960693, "learning_rate": 6.997105971243352e-05, "loss": 1.5055, "step": 17010 }, { "epoch": 0.6092001360860924, "grad_norm": 1.7176103591918945, "learning_rate": 6.995999621754219e-05, "loss": 1.3575, "step": 17011 }, { "epoch": 0.6092359482156606, "grad_norm": 1.3885085582733154, "learning_rate": 6.994893312680037e-05, "loss": 1.5756, "step": 17012 }, { "epoch": 0.6092717603452289, "grad_norm": 1.7695868015289307, "learning_rate": 6.99378704403569e-05, "loss": 1.5509, "step": 17013 }, { "epoch": 0.6093075724747972, "grad_norm": 1.2675243616104126, "learning_rate": 6.99268081583606e-05, "loss": 1.6999, "step": 17014 }, { "epoch": 0.6093433846043655, "grad_norm": 1.3948224782943726, "learning_rate": 6.991574628096033e-05, "loss": 1.4275, "step": 17015 }, { "epoch": 0.6093791967339338, "grad_norm": 2.945615530014038, "learning_rate": 6.990468480830482e-05, "loss": 1.319, "step": 17016 }, { "epoch": 0.6094150088635021, "grad_norm": 2.0076494216918945, "learning_rate": 6.989362374054302e-05, "loss": 1.4243, "step": 17017 }, { "epoch": 0.6094508209930704, "grad_norm": 2.0113306045532227, "learning_rate": 6.988256307782363e-05, "loss": 1.3713, "step": 17018 }, { "epoch": 0.6094866331226386, "grad_norm": 1.3602279424667358, "learning_rate": 6.987150282029555e-05, "loss": 1.3158, "step": 17019 }, { "epoch": 0.6095224452522069, "grad_norm": 2.0649795532226562, "learning_rate": 6.986044296810749e-05, "loss": 1.5799, "step": 17020 }, { "epoch": 0.6095582573817752, "grad_norm": 1.709782600402832, "learning_rate": 6.984938352140835e-05, "loss": 1.7156, "step": 17021 }, { "epoch": 0.6095940695113435, "grad_norm": 1.9718817472457886, "learning_rate": 6.983832448034684e-05, "loss": 1.3973, "step": 17022 }, { "epoch": 0.6096298816409118, "grad_norm": 1.8555179834365845, "learning_rate": 6.982726584507173e-05, "loss": 1.3785, "step": 17023 }, { "epoch": 0.6096656937704801, "grad_norm": 1.518578052520752, "learning_rate": 6.981620761573188e-05, "loss": 1.5094, "step": 17024 }, { "epoch": 0.6097015059000483, "grad_norm": 1.679343342781067, "learning_rate": 6.980514979247599e-05, "loss": 1.6682, "step": 17025 }, { "epoch": 0.6097373180296166, "grad_norm": 1.5894368886947632, "learning_rate": 6.979409237545291e-05, "loss": 1.3674, "step": 17026 }, { "epoch": 0.6097731301591849, "grad_norm": 1.8799184560775757, "learning_rate": 6.97830353648113e-05, "loss": 1.6178, "step": 17027 }, { "epoch": 0.6098089422887532, "grad_norm": 2.117532968521118, "learning_rate": 6.977197876070003e-05, "loss": 1.3589, "step": 17028 }, { "epoch": 0.6098447544183215, "grad_norm": 1.57524573802948, "learning_rate": 6.976092256326772e-05, "loss": 1.3024, "step": 17029 }, { "epoch": 0.6098805665478898, "grad_norm": 1.9387460947036743, "learning_rate": 6.974986677266326e-05, "loss": 1.4434, "step": 17030 }, { "epoch": 0.6099163786774581, "grad_norm": 2.020622730255127, "learning_rate": 6.973881138903531e-05, "loss": 1.5545, "step": 17031 }, { "epoch": 0.6099521908070263, "grad_norm": 1.553113341331482, "learning_rate": 6.972775641253259e-05, "loss": 1.2865, "step": 17032 }, { "epoch": 0.6099880029365946, "grad_norm": 1.6942930221557617, "learning_rate": 6.971670184330389e-05, "loss": 1.4666, "step": 17033 }, { "epoch": 0.6100238150661629, "grad_norm": 1.5093879699707031, "learning_rate": 6.970564768149788e-05, "loss": 1.3654, "step": 17034 }, { "epoch": 0.6100596271957311, "grad_norm": 1.5208278894424438, "learning_rate": 6.969459392726331e-05, "loss": 1.4862, "step": 17035 }, { "epoch": 0.6100954393252995, "grad_norm": 2.0230493545532227, "learning_rate": 6.968354058074887e-05, "loss": 1.2142, "step": 17036 }, { "epoch": 0.6101312514548678, "grad_norm": 1.8557683229446411, "learning_rate": 6.967248764210333e-05, "loss": 1.2707, "step": 17037 }, { "epoch": 0.6101670635844361, "grad_norm": 2.024885654449463, "learning_rate": 6.966143511147529e-05, "loss": 1.4075, "step": 17038 }, { "epoch": 0.6102028757140043, "grad_norm": 1.9817204475402832, "learning_rate": 6.965038298901356e-05, "loss": 1.39, "step": 17039 }, { "epoch": 0.6102386878435726, "grad_norm": 3.7637715339660645, "learning_rate": 6.963933127486677e-05, "loss": 1.4094, "step": 17040 }, { "epoch": 0.6102744999731409, "grad_norm": 1.5731815099716187, "learning_rate": 6.96282799691836e-05, "loss": 1.4704, "step": 17041 }, { "epoch": 0.6103103121027091, "grad_norm": 1.9939146041870117, "learning_rate": 6.961722907211277e-05, "loss": 1.4589, "step": 17042 }, { "epoch": 0.6103461242322775, "grad_norm": 1.8387165069580078, "learning_rate": 6.96061785838029e-05, "loss": 1.3256, "step": 17043 }, { "epoch": 0.6103819363618458, "grad_norm": 1.8265881538391113, "learning_rate": 6.95951285044027e-05, "loss": 1.5793, "step": 17044 }, { "epoch": 0.6104177484914141, "grad_norm": 1.364792823791504, "learning_rate": 6.958407883406082e-05, "loss": 1.5501, "step": 17045 }, { "epoch": 0.6104535606209823, "grad_norm": 1.4304333925247192, "learning_rate": 6.957302957292596e-05, "loss": 1.3732, "step": 17046 }, { "epoch": 0.6104893727505506, "grad_norm": 1.5327463150024414, "learning_rate": 6.956198072114669e-05, "loss": 1.3424, "step": 17047 }, { "epoch": 0.6105251848801189, "grad_norm": 1.5442783832550049, "learning_rate": 6.955093227887175e-05, "loss": 1.3564, "step": 17048 }, { "epoch": 0.6105609970096871, "grad_norm": 1.7592949867248535, "learning_rate": 6.953988424624973e-05, "loss": 1.5495, "step": 17049 }, { "epoch": 0.6105968091392555, "grad_norm": 1.6681785583496094, "learning_rate": 6.952883662342926e-05, "loss": 1.3294, "step": 17050 }, { "epoch": 0.6106326212688238, "grad_norm": 2.178701639175415, "learning_rate": 6.9517789410559e-05, "loss": 1.6474, "step": 17051 }, { "epoch": 0.610668433398392, "grad_norm": 1.7608951330184937, "learning_rate": 6.950674260778755e-05, "loss": 1.4254, "step": 17052 }, { "epoch": 0.6107042455279603, "grad_norm": 1.4168155193328857, "learning_rate": 6.949569621526357e-05, "loss": 1.4225, "step": 17053 }, { "epoch": 0.6107400576575286, "grad_norm": 1.6277146339416504, "learning_rate": 6.948465023313562e-05, "loss": 1.4284, "step": 17054 }, { "epoch": 0.6107758697870969, "grad_norm": 1.4581594467163086, "learning_rate": 6.947360466155237e-05, "loss": 1.4763, "step": 17055 }, { "epoch": 0.6108116819166651, "grad_norm": 2.682927370071411, "learning_rate": 6.946255950066236e-05, "loss": 1.6454, "step": 17056 }, { "epoch": 0.6108474940462335, "grad_norm": 1.8958370685577393, "learning_rate": 6.945151475061425e-05, "loss": 1.5278, "step": 17057 }, { "epoch": 0.6108833061758018, "grad_norm": 2.073317289352417, "learning_rate": 6.944047041155662e-05, "loss": 1.7897, "step": 17058 }, { "epoch": 0.61091911830537, "grad_norm": 1.4627833366394043, "learning_rate": 6.9429426483638e-05, "loss": 1.3666, "step": 17059 }, { "epoch": 0.6109549304349383, "grad_norm": 1.4396252632141113, "learning_rate": 6.941838296700703e-05, "loss": 1.3275, "step": 17060 }, { "epoch": 0.6109907425645066, "grad_norm": 1.7097407579421997, "learning_rate": 6.940733986181226e-05, "loss": 1.4636, "step": 17061 }, { "epoch": 0.6110265546940749, "grad_norm": 2.193366765975952, "learning_rate": 6.939629716820229e-05, "loss": 1.3695, "step": 17062 }, { "epoch": 0.6110623668236431, "grad_norm": 1.5271308422088623, "learning_rate": 6.938525488632563e-05, "loss": 1.4105, "step": 17063 }, { "epoch": 0.6110981789532115, "grad_norm": 1.4365044832229614, "learning_rate": 6.937421301633091e-05, "loss": 1.2441, "step": 17064 }, { "epoch": 0.6111339910827798, "grad_norm": 2.1028079986572266, "learning_rate": 6.936317155836664e-05, "loss": 1.3424, "step": 17065 }, { "epoch": 0.611169803212348, "grad_norm": 1.867521047592163, "learning_rate": 6.935213051258138e-05, "loss": 1.3437, "step": 17066 }, { "epoch": 0.6112056153419163, "grad_norm": 1.709039330482483, "learning_rate": 6.934108987912369e-05, "loss": 1.5545, "step": 17067 }, { "epoch": 0.6112414274714846, "grad_norm": 1.8121676445007324, "learning_rate": 6.933004965814205e-05, "loss": 1.7838, "step": 17068 }, { "epoch": 0.6112772396010528, "grad_norm": 1.734595537185669, "learning_rate": 6.931900984978506e-05, "loss": 1.3001, "step": 17069 }, { "epoch": 0.6113130517306211, "grad_norm": 1.3379672765731812, "learning_rate": 6.930797045420119e-05, "loss": 1.4297, "step": 17070 }, { "epoch": 0.6113488638601895, "grad_norm": 1.8500645160675049, "learning_rate": 6.929693147153902e-05, "loss": 1.3034, "step": 17071 }, { "epoch": 0.6113846759897578, "grad_norm": 1.3984006643295288, "learning_rate": 6.9285892901947e-05, "loss": 1.4553, "step": 17072 }, { "epoch": 0.611420488119326, "grad_norm": 1.7027636766433716, "learning_rate": 6.927485474557369e-05, "loss": 1.6593, "step": 17073 }, { "epoch": 0.6114563002488943, "grad_norm": 1.6550413370132446, "learning_rate": 6.926381700256757e-05, "loss": 1.5719, "step": 17074 }, { "epoch": 0.6114921123784626, "grad_norm": 1.6176135540008545, "learning_rate": 6.925277967307717e-05, "loss": 1.7674, "step": 17075 }, { "epoch": 0.6115279245080308, "grad_norm": 1.5670104026794434, "learning_rate": 6.924174275725094e-05, "loss": 1.5544, "step": 17076 }, { "epoch": 0.6115637366375991, "grad_norm": 1.4483779668807983, "learning_rate": 6.923070625523737e-05, "loss": 1.6666, "step": 17077 }, { "epoch": 0.6115995487671675, "grad_norm": 1.5518686771392822, "learning_rate": 6.921967016718499e-05, "loss": 1.15, "step": 17078 }, { "epoch": 0.6116353608967358, "grad_norm": 1.4891283512115479, "learning_rate": 6.920863449324221e-05, "loss": 1.4193, "step": 17079 }, { "epoch": 0.611671173026304, "grad_norm": 1.8726311922073364, "learning_rate": 6.919759923355756e-05, "loss": 1.4929, "step": 17080 }, { "epoch": 0.6117069851558723, "grad_norm": 2.429152488708496, "learning_rate": 6.918656438827946e-05, "loss": 1.3696, "step": 17081 }, { "epoch": 0.6117427972854406, "grad_norm": 1.599990725517273, "learning_rate": 6.917552995755641e-05, "loss": 1.4851, "step": 17082 }, { "epoch": 0.6117786094150088, "grad_norm": 1.8051741123199463, "learning_rate": 6.916449594153682e-05, "loss": 1.43, "step": 17083 }, { "epoch": 0.6118144215445771, "grad_norm": 1.5804451704025269, "learning_rate": 6.915346234036919e-05, "loss": 0.9616, "step": 17084 }, { "epoch": 0.6118502336741455, "grad_norm": 1.3752074241638184, "learning_rate": 6.914242915420193e-05, "loss": 1.2154, "step": 17085 }, { "epoch": 0.6118860458037138, "grad_norm": 1.573339581489563, "learning_rate": 6.913139638318346e-05, "loss": 1.3455, "step": 17086 }, { "epoch": 0.611921857933282, "grad_norm": 1.6640084981918335, "learning_rate": 6.912036402746227e-05, "loss": 1.3296, "step": 17087 }, { "epoch": 0.6119576700628503, "grad_norm": 1.7510104179382324, "learning_rate": 6.910933208718671e-05, "loss": 1.3694, "step": 17088 }, { "epoch": 0.6119934821924186, "grad_norm": 1.2308443784713745, "learning_rate": 6.909830056250527e-05, "loss": 1.4283, "step": 17089 }, { "epoch": 0.6120292943219868, "grad_norm": 1.7887136936187744, "learning_rate": 6.908726945356632e-05, "loss": 1.4801, "step": 17090 }, { "epoch": 0.6120651064515551, "grad_norm": 1.6139098405838013, "learning_rate": 6.90762387605183e-05, "loss": 1.6002, "step": 17091 }, { "epoch": 0.6121009185811235, "grad_norm": 1.8948484659194946, "learning_rate": 6.906520848350957e-05, "loss": 1.4782, "step": 17092 }, { "epoch": 0.6121367307106917, "grad_norm": 1.8423246145248413, "learning_rate": 6.905417862268859e-05, "loss": 1.4955, "step": 17093 }, { "epoch": 0.61217254284026, "grad_norm": 1.6913235187530518, "learning_rate": 6.904314917820371e-05, "loss": 1.3755, "step": 17094 }, { "epoch": 0.6122083549698283, "grad_norm": 1.9365330934524536, "learning_rate": 6.90321201502033e-05, "loss": 1.5667, "step": 17095 }, { "epoch": 0.6122441670993966, "grad_norm": 2.045860767364502, "learning_rate": 6.90210915388358e-05, "loss": 1.4949, "step": 17096 }, { "epoch": 0.6122799792289648, "grad_norm": 1.4850900173187256, "learning_rate": 6.901006334424953e-05, "loss": 1.2754, "step": 17097 }, { "epoch": 0.6123157913585331, "grad_norm": 1.5501368045806885, "learning_rate": 6.89990355665929e-05, "loss": 1.3983, "step": 17098 }, { "epoch": 0.6123516034881015, "grad_norm": 1.471760630607605, "learning_rate": 6.898800820601425e-05, "loss": 1.4922, "step": 17099 }, { "epoch": 0.6123874156176697, "grad_norm": 1.7545149326324463, "learning_rate": 6.897698126266197e-05, "loss": 1.83, "step": 17100 }, { "epoch": 0.612423227747238, "grad_norm": 1.2826873064041138, "learning_rate": 6.896595473668435e-05, "loss": 1.589, "step": 17101 }, { "epoch": 0.6124590398768063, "grad_norm": 1.2966712713241577, "learning_rate": 6.89549286282298e-05, "loss": 1.1177, "step": 17102 }, { "epoch": 0.6124948520063745, "grad_norm": 1.3103042840957642, "learning_rate": 6.894390293744668e-05, "loss": 1.4203, "step": 17103 }, { "epoch": 0.6125306641359428, "grad_norm": 1.613857388496399, "learning_rate": 6.893287766448321e-05, "loss": 1.4521, "step": 17104 }, { "epoch": 0.6125664762655111, "grad_norm": 1.5288715362548828, "learning_rate": 6.892185280948786e-05, "loss": 1.3655, "step": 17105 }, { "epoch": 0.6126022883950795, "grad_norm": 1.2962150573730469, "learning_rate": 6.891082837260885e-05, "loss": 1.3534, "step": 17106 }, { "epoch": 0.6126381005246477, "grad_norm": 1.786429524421692, "learning_rate": 6.889980435399456e-05, "loss": 1.8125, "step": 17107 }, { "epoch": 0.612673912654216, "grad_norm": 1.44051194190979, "learning_rate": 6.888878075379326e-05, "loss": 1.4555, "step": 17108 }, { "epoch": 0.6127097247837843, "grad_norm": 1.836295485496521, "learning_rate": 6.887775757215334e-05, "loss": 1.7671, "step": 17109 }, { "epoch": 0.6127455369133525, "grad_norm": 1.9834587574005127, "learning_rate": 6.886673480922299e-05, "loss": 1.5618, "step": 17110 }, { "epoch": 0.6127813490429208, "grad_norm": 1.558998942375183, "learning_rate": 6.88557124651506e-05, "loss": 1.4063, "step": 17111 }, { "epoch": 0.6128171611724891, "grad_norm": 1.5703425407409668, "learning_rate": 6.884469054008444e-05, "loss": 1.3246, "step": 17112 }, { "epoch": 0.6128529733020575, "grad_norm": 1.665521264076233, "learning_rate": 6.883366903417273e-05, "loss": 1.3784, "step": 17113 }, { "epoch": 0.6128887854316257, "grad_norm": 1.5847508907318115, "learning_rate": 6.882264794756386e-05, "loss": 1.3142, "step": 17114 }, { "epoch": 0.612924597561194, "grad_norm": 1.8532623052597046, "learning_rate": 6.881162728040598e-05, "loss": 1.4714, "step": 17115 }, { "epoch": 0.6129604096907623, "grad_norm": 1.7318142652511597, "learning_rate": 6.880060703284748e-05, "loss": 1.6911, "step": 17116 }, { "epoch": 0.6129962218203305, "grad_norm": 1.7050365209579468, "learning_rate": 6.878958720503652e-05, "loss": 1.7391, "step": 17117 }, { "epoch": 0.6130320339498988, "grad_norm": 1.766896367073059, "learning_rate": 6.877856779712147e-05, "loss": 1.4107, "step": 17118 }, { "epoch": 0.6130678460794671, "grad_norm": 1.5562337636947632, "learning_rate": 6.876754880925049e-05, "loss": 1.4708, "step": 17119 }, { "epoch": 0.6131036582090355, "grad_norm": 1.6931556463241577, "learning_rate": 6.87565302415718e-05, "loss": 1.5617, "step": 17120 }, { "epoch": 0.6131394703386037, "grad_norm": 1.5268492698669434, "learning_rate": 6.874551209423376e-05, "loss": 1.5087, "step": 17121 }, { "epoch": 0.613175282468172, "grad_norm": 1.6822506189346313, "learning_rate": 6.873449436738451e-05, "loss": 1.4391, "step": 17122 }, { "epoch": 0.6132110945977403, "grad_norm": 1.6287306547164917, "learning_rate": 6.872347706117233e-05, "loss": 1.4453, "step": 17123 }, { "epoch": 0.6132469067273085, "grad_norm": 1.3523920774459839, "learning_rate": 6.871246017574537e-05, "loss": 1.4051, "step": 17124 }, { "epoch": 0.6132827188568768, "grad_norm": 2.1489264965057373, "learning_rate": 6.870144371125198e-05, "loss": 1.2533, "step": 17125 }, { "epoch": 0.6133185309864451, "grad_norm": 1.7597583532333374, "learning_rate": 6.869042766784022e-05, "loss": 1.439, "step": 17126 }, { "epoch": 0.6133543431160134, "grad_norm": 1.3666061162948608, "learning_rate": 6.867941204565843e-05, "loss": 1.1421, "step": 17127 }, { "epoch": 0.6133901552455817, "grad_norm": 1.6297531127929688, "learning_rate": 6.866839684485473e-05, "loss": 1.4425, "step": 17128 }, { "epoch": 0.61342596737515, "grad_norm": 1.778544545173645, "learning_rate": 6.865738206557731e-05, "loss": 1.3258, "step": 17129 }, { "epoch": 0.6134617795047183, "grad_norm": 1.8034090995788574, "learning_rate": 6.864636770797441e-05, "loss": 1.5363, "step": 17130 }, { "epoch": 0.6134975916342865, "grad_norm": 1.9214576482772827, "learning_rate": 6.863535377219417e-05, "loss": 1.5098, "step": 17131 }, { "epoch": 0.6135334037638548, "grad_norm": 1.9046701192855835, "learning_rate": 6.862434025838481e-05, "loss": 1.6811, "step": 17132 }, { "epoch": 0.6135692158934231, "grad_norm": 2.123274087905884, "learning_rate": 6.861332716669444e-05, "loss": 1.6217, "step": 17133 }, { "epoch": 0.6136050280229914, "grad_norm": 1.7774008512496948, "learning_rate": 6.860231449727133e-05, "loss": 1.5144, "step": 17134 }, { "epoch": 0.6136408401525597, "grad_norm": 1.6106023788452148, "learning_rate": 6.859130225026351e-05, "loss": 1.425, "step": 17135 }, { "epoch": 0.613676652282128, "grad_norm": 1.860154628753662, "learning_rate": 6.858029042581926e-05, "loss": 1.1347, "step": 17136 }, { "epoch": 0.6137124644116962, "grad_norm": 1.433030605316162, "learning_rate": 6.856927902408666e-05, "loss": 1.6064, "step": 17137 }, { "epoch": 0.6137482765412645, "grad_norm": 1.4676074981689453, "learning_rate": 6.855826804521386e-05, "loss": 1.3863, "step": 17138 }, { "epoch": 0.6137840886708328, "grad_norm": 1.465326189994812, "learning_rate": 6.854725748934901e-05, "loss": 1.6177, "step": 17139 }, { "epoch": 0.613819900800401, "grad_norm": 2.1251637935638428, "learning_rate": 6.853624735664021e-05, "loss": 1.0243, "step": 17140 }, { "epoch": 0.6138557129299694, "grad_norm": 1.8610551357269287, "learning_rate": 6.852523764723566e-05, "loss": 1.7238, "step": 17141 }, { "epoch": 0.6138915250595377, "grad_norm": 1.2986605167388916, "learning_rate": 6.85142283612834e-05, "loss": 1.4475, "step": 17142 }, { "epoch": 0.613927337189106, "grad_norm": 2.99711537361145, "learning_rate": 6.850321949893162e-05, "loss": 1.3241, "step": 17143 }, { "epoch": 0.6139631493186742, "grad_norm": 1.5854588747024536, "learning_rate": 6.849221106032837e-05, "loss": 1.308, "step": 17144 }, { "epoch": 0.6139989614482425, "grad_norm": 1.4505645036697388, "learning_rate": 6.84812030456218e-05, "loss": 1.5152, "step": 17145 }, { "epoch": 0.6140347735778108, "grad_norm": 1.736111044883728, "learning_rate": 6.847019545495998e-05, "loss": 1.6171, "step": 17146 }, { "epoch": 0.614070585707379, "grad_norm": 1.6995787620544434, "learning_rate": 6.845918828849099e-05, "loss": 1.0826, "step": 17147 }, { "epoch": 0.6141063978369474, "grad_norm": 2.0281624794006348, "learning_rate": 6.844818154636295e-05, "loss": 1.5205, "step": 17148 }, { "epoch": 0.6141422099665157, "grad_norm": 1.5841419696807861, "learning_rate": 6.843717522872393e-05, "loss": 1.4293, "step": 17149 }, { "epoch": 0.614178022096084, "grad_norm": 1.547128677368164, "learning_rate": 6.8426169335722e-05, "loss": 1.4819, "step": 17150 }, { "epoch": 0.6142138342256522, "grad_norm": 1.5373615026474, "learning_rate": 6.841516386750523e-05, "loss": 1.326, "step": 17151 }, { "epoch": 0.6142496463552205, "grad_norm": 1.5892366170883179, "learning_rate": 6.84041588242217e-05, "loss": 1.772, "step": 17152 }, { "epoch": 0.6142854584847888, "grad_norm": 1.3406174182891846, "learning_rate": 6.839315420601943e-05, "loss": 1.4543, "step": 17153 }, { "epoch": 0.614321270614357, "grad_norm": 2.1202826499938965, "learning_rate": 6.838215001304654e-05, "loss": 1.5323, "step": 17154 }, { "epoch": 0.6143570827439254, "grad_norm": 2.2001073360443115, "learning_rate": 6.837114624545102e-05, "loss": 1.8068, "step": 17155 }, { "epoch": 0.6143928948734937, "grad_norm": 1.5223966836929321, "learning_rate": 6.836014290338093e-05, "loss": 1.5163, "step": 17156 }, { "epoch": 0.614428707003062, "grad_norm": 2.0496227741241455, "learning_rate": 6.834913998698432e-05, "loss": 1.7119, "step": 17157 }, { "epoch": 0.6144645191326302, "grad_norm": 1.7774089574813843, "learning_rate": 6.833813749640916e-05, "loss": 1.1044, "step": 17158 }, { "epoch": 0.6145003312621985, "grad_norm": 1.9635884761810303, "learning_rate": 6.832713543180356e-05, "loss": 1.1691, "step": 17159 }, { "epoch": 0.6145361433917668, "grad_norm": 1.539371371269226, "learning_rate": 6.831613379331547e-05, "loss": 1.3793, "step": 17160 }, { "epoch": 0.614571955521335, "grad_norm": 1.989688515663147, "learning_rate": 6.830513258109296e-05, "loss": 1.8035, "step": 17161 }, { "epoch": 0.6146077676509034, "grad_norm": 1.5166738033294678, "learning_rate": 6.829413179528398e-05, "loss": 1.2237, "step": 17162 }, { "epoch": 0.6146435797804717, "grad_norm": 1.495975375175476, "learning_rate": 6.828313143603657e-05, "loss": 1.1678, "step": 17163 }, { "epoch": 0.61467939191004, "grad_norm": 1.3702117204666138, "learning_rate": 6.827213150349874e-05, "loss": 1.5745, "step": 17164 }, { "epoch": 0.6147152040396082, "grad_norm": 1.3687939643859863, "learning_rate": 6.826113199781841e-05, "loss": 1.2165, "step": 17165 }, { "epoch": 0.6147510161691765, "grad_norm": 1.5471529960632324, "learning_rate": 6.825013291914363e-05, "loss": 1.4653, "step": 17166 }, { "epoch": 0.6147868282987448, "grad_norm": 1.3832335472106934, "learning_rate": 6.823913426762237e-05, "loss": 1.4773, "step": 17167 }, { "epoch": 0.614822640428313, "grad_norm": 1.4992239475250244, "learning_rate": 6.822813604340257e-05, "loss": 1.4386, "step": 17168 }, { "epoch": 0.6148584525578814, "grad_norm": 1.5408648252487183, "learning_rate": 6.821713824663221e-05, "loss": 1.4708, "step": 17169 }, { "epoch": 0.6148942646874497, "grad_norm": 1.663112998008728, "learning_rate": 6.820614087745929e-05, "loss": 1.4123, "step": 17170 }, { "epoch": 0.614930076817018, "grad_norm": 1.6703062057495117, "learning_rate": 6.81951439360317e-05, "loss": 1.288, "step": 17171 }, { "epoch": 0.6149658889465862, "grad_norm": 1.5483388900756836, "learning_rate": 6.818414742249745e-05, "loss": 1.3075, "step": 17172 }, { "epoch": 0.6150017010761545, "grad_norm": 1.4388359785079956, "learning_rate": 6.817315133700446e-05, "loss": 1.5018, "step": 17173 }, { "epoch": 0.6150375132057228, "grad_norm": 1.47565758228302, "learning_rate": 6.816215567970063e-05, "loss": 1.5788, "step": 17174 }, { "epoch": 0.615073325335291, "grad_norm": 1.9998646974563599, "learning_rate": 6.815116045073396e-05, "loss": 1.4184, "step": 17175 }, { "epoch": 0.6151091374648594, "grad_norm": 2.213521957397461, "learning_rate": 6.814016565025231e-05, "loss": 1.6169, "step": 17176 }, { "epoch": 0.6151449495944277, "grad_norm": 1.4681707620620728, "learning_rate": 6.812917127840368e-05, "loss": 1.568, "step": 17177 }, { "epoch": 0.6151807617239959, "grad_norm": 1.8381984233856201, "learning_rate": 6.81181773353359e-05, "loss": 1.657, "step": 17178 }, { "epoch": 0.6152165738535642, "grad_norm": 1.8551054000854492, "learning_rate": 6.810718382119694e-05, "loss": 1.4701, "step": 17179 }, { "epoch": 0.6152523859831325, "grad_norm": 1.313694953918457, "learning_rate": 6.809619073613467e-05, "loss": 1.3976, "step": 17180 }, { "epoch": 0.6152881981127007, "grad_norm": 2.456167697906494, "learning_rate": 6.808519808029703e-05, "loss": 1.3742, "step": 17181 }, { "epoch": 0.615324010242269, "grad_norm": 1.4624918699264526, "learning_rate": 6.807420585383186e-05, "loss": 1.5296, "step": 17182 }, { "epoch": 0.6153598223718374, "grad_norm": 1.544368863105774, "learning_rate": 6.806321405688707e-05, "loss": 1.6921, "step": 17183 }, { "epoch": 0.6153956345014057, "grad_norm": 1.5174061059951782, "learning_rate": 6.805222268961054e-05, "loss": 1.3493, "step": 17184 }, { "epoch": 0.6154314466309739, "grad_norm": 1.750654935836792, "learning_rate": 6.804123175215014e-05, "loss": 1.5409, "step": 17185 }, { "epoch": 0.6154672587605422, "grad_norm": 1.7481032609939575, "learning_rate": 6.803024124465375e-05, "loss": 1.4192, "step": 17186 }, { "epoch": 0.6155030708901105, "grad_norm": 1.7678956985473633, "learning_rate": 6.801925116726922e-05, "loss": 1.3979, "step": 17187 }, { "epoch": 0.6155388830196787, "grad_norm": 1.6800671815872192, "learning_rate": 6.800826152014442e-05, "loss": 1.277, "step": 17188 }, { "epoch": 0.615574695149247, "grad_norm": 2.4462714195251465, "learning_rate": 6.799727230342718e-05, "loss": 1.5407, "step": 17189 }, { "epoch": 0.6156105072788154, "grad_norm": 1.6587086915969849, "learning_rate": 6.798628351726539e-05, "loss": 1.3014, "step": 17190 }, { "epoch": 0.6156463194083837, "grad_norm": 1.3719878196716309, "learning_rate": 6.797529516180687e-05, "loss": 1.2125, "step": 17191 }, { "epoch": 0.6156821315379519, "grad_norm": 1.9094762802124023, "learning_rate": 6.796430723719939e-05, "loss": 1.2202, "step": 17192 }, { "epoch": 0.6157179436675202, "grad_norm": 1.774322271347046, "learning_rate": 6.795331974359088e-05, "loss": 1.7452, "step": 17193 }, { "epoch": 0.6157537557970885, "grad_norm": 2.135695695877075, "learning_rate": 6.794233268112907e-05, "loss": 1.7369, "step": 17194 }, { "epoch": 0.6157895679266567, "grad_norm": 1.5609972476959229, "learning_rate": 6.793134604996185e-05, "loss": 1.347, "step": 17195 }, { "epoch": 0.615825380056225, "grad_norm": 1.5753405094146729, "learning_rate": 6.7920359850237e-05, "loss": 1.3896, "step": 17196 }, { "epoch": 0.6158611921857934, "grad_norm": 2.917074203491211, "learning_rate": 6.790937408210233e-05, "loss": 1.528, "step": 17197 }, { "epoch": 0.6158970043153617, "grad_norm": 1.5047675371170044, "learning_rate": 6.789838874570565e-05, "loss": 1.5584, "step": 17198 }, { "epoch": 0.6159328164449299, "grad_norm": 1.545350193977356, "learning_rate": 6.788740384119472e-05, "loss": 1.5371, "step": 17199 }, { "epoch": 0.6159686285744982, "grad_norm": 1.9652788639068604, "learning_rate": 6.787641936871739e-05, "loss": 1.5017, "step": 17200 }, { "epoch": 0.6160044407040665, "grad_norm": 1.3209742307662964, "learning_rate": 6.786543532842133e-05, "loss": 1.1845, "step": 17201 }, { "epoch": 0.6160402528336347, "grad_norm": 2.2090561389923096, "learning_rate": 6.785445172045448e-05, "loss": 1.6238, "step": 17202 }, { "epoch": 0.616076064963203, "grad_norm": 1.6406116485595703, "learning_rate": 6.784346854496442e-05, "loss": 1.2969, "step": 17203 }, { "epoch": 0.6161118770927714, "grad_norm": 1.7043877840042114, "learning_rate": 6.78324858020991e-05, "loss": 1.5182, "step": 17204 }, { "epoch": 0.6161476892223396, "grad_norm": 1.4550445079803467, "learning_rate": 6.78215034920061e-05, "loss": 1.5474, "step": 17205 }, { "epoch": 0.6161835013519079, "grad_norm": 1.4933606386184692, "learning_rate": 6.781052161483332e-05, "loss": 1.3881, "step": 17206 }, { "epoch": 0.6162193134814762, "grad_norm": 1.5615988969802856, "learning_rate": 6.779954017072842e-05, "loss": 1.5827, "step": 17207 }, { "epoch": 0.6162551256110445, "grad_norm": 1.4446730613708496, "learning_rate": 6.778855915983921e-05, "loss": 1.1332, "step": 17208 }, { "epoch": 0.6162909377406127, "grad_norm": 1.8601797819137573, "learning_rate": 6.777757858231339e-05, "loss": 1.252, "step": 17209 }, { "epoch": 0.616326749870181, "grad_norm": 1.5915179252624512, "learning_rate": 6.776659843829863e-05, "loss": 1.4509, "step": 17210 }, { "epoch": 0.6163625619997494, "grad_norm": 1.6909068822860718, "learning_rate": 6.775561872794279e-05, "loss": 1.477, "step": 17211 }, { "epoch": 0.6163983741293176, "grad_norm": 1.5252338647842407, "learning_rate": 6.774463945139343e-05, "loss": 1.6273, "step": 17212 }, { "epoch": 0.6164341862588859, "grad_norm": 1.677097201347351, "learning_rate": 6.77336606087984e-05, "loss": 1.4723, "step": 17213 }, { "epoch": 0.6164699983884542, "grad_norm": 1.8449418544769287, "learning_rate": 6.772268220030528e-05, "loss": 1.201, "step": 17214 }, { "epoch": 0.6165058105180224, "grad_norm": 1.9632333517074585, "learning_rate": 6.77117042260619e-05, "loss": 1.6116, "step": 17215 }, { "epoch": 0.6165416226475907, "grad_norm": 1.7720052003860474, "learning_rate": 6.770072668621583e-05, "loss": 1.6009, "step": 17216 }, { "epoch": 0.616577434777159, "grad_norm": 2.2499513626098633, "learning_rate": 6.768974958091488e-05, "loss": 1.2514, "step": 17217 }, { "epoch": 0.6166132469067274, "grad_norm": 1.5738240480422974, "learning_rate": 6.767877291030666e-05, "loss": 1.6878, "step": 17218 }, { "epoch": 0.6166490590362956, "grad_norm": 1.6386752128601074, "learning_rate": 6.766779667453881e-05, "loss": 1.4406, "step": 17219 }, { "epoch": 0.6166848711658639, "grad_norm": 1.473636507987976, "learning_rate": 6.765682087375912e-05, "loss": 1.1257, "step": 17220 }, { "epoch": 0.6167206832954322, "grad_norm": 1.5793477296829224, "learning_rate": 6.764584550811512e-05, "loss": 1.3315, "step": 17221 }, { "epoch": 0.6167564954250004, "grad_norm": 2.4050142765045166, "learning_rate": 6.763487057775459e-05, "loss": 1.4098, "step": 17222 }, { "epoch": 0.6167923075545687, "grad_norm": 1.66704261302948, "learning_rate": 6.762389608282507e-05, "loss": 1.2545, "step": 17223 }, { "epoch": 0.616828119684137, "grad_norm": 1.397594928741455, "learning_rate": 6.761292202347434e-05, "loss": 1.5063, "step": 17224 }, { "epoch": 0.6168639318137054, "grad_norm": 1.3447819948196411, "learning_rate": 6.760194839984994e-05, "loss": 1.281, "step": 17225 }, { "epoch": 0.6168997439432736, "grad_norm": 1.6767772436141968, "learning_rate": 6.75909752120995e-05, "loss": 1.8086, "step": 17226 }, { "epoch": 0.6169355560728419, "grad_norm": 1.7301512956619263, "learning_rate": 6.758000246037072e-05, "loss": 1.8668, "step": 17227 }, { "epoch": 0.6169713682024102, "grad_norm": 1.7940595149993896, "learning_rate": 6.756903014481116e-05, "loss": 1.3394, "step": 17228 }, { "epoch": 0.6170071803319784, "grad_norm": 1.7685900926589966, "learning_rate": 6.75580582655685e-05, "loss": 1.7772, "step": 17229 }, { "epoch": 0.6170429924615467, "grad_norm": 1.6074085235595703, "learning_rate": 6.754708682279027e-05, "loss": 1.2769, "step": 17230 }, { "epoch": 0.617078804591115, "grad_norm": 2.404395580291748, "learning_rate": 6.753611581662418e-05, "loss": 1.546, "step": 17231 }, { "epoch": 0.6171146167206832, "grad_norm": 1.7004632949829102, "learning_rate": 6.752514524721771e-05, "loss": 1.705, "step": 17232 }, { "epoch": 0.6171504288502516, "grad_norm": 1.2684026956558228, "learning_rate": 6.751417511471859e-05, "loss": 1.5119, "step": 17233 }, { "epoch": 0.6171862409798199, "grad_norm": 1.4887030124664307, "learning_rate": 6.750320541927433e-05, "loss": 1.4895, "step": 17234 }, { "epoch": 0.6172220531093882, "grad_norm": 1.5183050632476807, "learning_rate": 6.749223616103249e-05, "loss": 1.4796, "step": 17235 }, { "epoch": 0.6172578652389564, "grad_norm": 1.4999533891677856, "learning_rate": 6.74812673401407e-05, "loss": 1.0856, "step": 17236 }, { "epoch": 0.6172936773685247, "grad_norm": 1.952275276184082, "learning_rate": 6.74702989567465e-05, "loss": 1.5083, "step": 17237 }, { "epoch": 0.617329489498093, "grad_norm": 1.649373173713684, "learning_rate": 6.745933101099748e-05, "loss": 1.5587, "step": 17238 }, { "epoch": 0.6173653016276612, "grad_norm": 1.4925177097320557, "learning_rate": 6.744836350304118e-05, "loss": 1.1892, "step": 17239 }, { "epoch": 0.6174011137572296, "grad_norm": 1.5347260236740112, "learning_rate": 6.743739643302516e-05, "loss": 1.4524, "step": 17240 }, { "epoch": 0.6174369258867979, "grad_norm": 1.3831895589828491, "learning_rate": 6.742642980109696e-05, "loss": 1.421, "step": 17241 }, { "epoch": 0.6174727380163662, "grad_norm": 2.0352489948272705, "learning_rate": 6.741546360740415e-05, "loss": 1.8222, "step": 17242 }, { "epoch": 0.6175085501459344, "grad_norm": 1.7592264413833618, "learning_rate": 6.740449785209425e-05, "loss": 1.3654, "step": 17243 }, { "epoch": 0.6175443622755027, "grad_norm": 1.4026323556900024, "learning_rate": 6.739353253531475e-05, "loss": 1.6612, "step": 17244 }, { "epoch": 0.617580174405071, "grad_norm": 2.095674991607666, "learning_rate": 6.738256765721324e-05, "loss": 1.3747, "step": 17245 }, { "epoch": 0.6176159865346392, "grad_norm": 1.5699025392532349, "learning_rate": 6.73716032179372e-05, "loss": 1.3018, "step": 17246 }, { "epoch": 0.6176517986642076, "grad_norm": 2.3074796199798584, "learning_rate": 6.736063921763415e-05, "loss": 1.4714, "step": 17247 }, { "epoch": 0.6176876107937759, "grad_norm": 1.499690294265747, "learning_rate": 6.73496756564516e-05, "loss": 1.3134, "step": 17248 }, { "epoch": 0.6177234229233441, "grad_norm": 1.9261491298675537, "learning_rate": 6.733871253453707e-05, "loss": 1.6609, "step": 17249 }, { "epoch": 0.6177592350529124, "grad_norm": 1.6872670650482178, "learning_rate": 6.7327749852038e-05, "loss": 1.2668, "step": 17250 }, { "epoch": 0.6177950471824807, "grad_norm": 1.8014341592788696, "learning_rate": 6.731678760910192e-05, "loss": 1.5728, "step": 17251 }, { "epoch": 0.617830859312049, "grad_norm": 1.621647596359253, "learning_rate": 6.730582580587632e-05, "loss": 1.3942, "step": 17252 }, { "epoch": 0.6178666714416172, "grad_norm": 1.6194264888763428, "learning_rate": 6.729486444250863e-05, "loss": 1.5561, "step": 17253 }, { "epoch": 0.6179024835711856, "grad_norm": 1.5640456676483154, "learning_rate": 6.72839035191464e-05, "loss": 1.169, "step": 17254 }, { "epoch": 0.6179382957007539, "grad_norm": 1.567991018295288, "learning_rate": 6.7272943035937e-05, "loss": 1.6388, "step": 17255 }, { "epoch": 0.6179741078303221, "grad_norm": 1.513309121131897, "learning_rate": 6.726198299302796e-05, "loss": 1.5298, "step": 17256 }, { "epoch": 0.6180099199598904, "grad_norm": 1.6666152477264404, "learning_rate": 6.72510233905667e-05, "loss": 1.3742, "step": 17257 }, { "epoch": 0.6180457320894587, "grad_norm": 1.8331990242004395, "learning_rate": 6.724006422870069e-05, "loss": 1.3625, "step": 17258 }, { "epoch": 0.618081544219027, "grad_norm": 1.3519563674926758, "learning_rate": 6.722910550757734e-05, "loss": 1.5429, "step": 17259 }, { "epoch": 0.6181173563485952, "grad_norm": 1.70087730884552, "learning_rate": 6.721814722734412e-05, "loss": 1.2871, "step": 17260 }, { "epoch": 0.6181531684781636, "grad_norm": 1.6571663618087769, "learning_rate": 6.720718938814846e-05, "loss": 1.4969, "step": 17261 }, { "epoch": 0.6181889806077319, "grad_norm": 1.8243577480316162, "learning_rate": 6.719623199013771e-05, "loss": 1.343, "step": 17262 }, { "epoch": 0.6182247927373001, "grad_norm": 1.3904130458831787, "learning_rate": 6.718527503345939e-05, "loss": 1.3714, "step": 17263 }, { "epoch": 0.6182606048668684, "grad_norm": 1.2243188619613647, "learning_rate": 6.717431851826086e-05, "loss": 1.0458, "step": 17264 }, { "epoch": 0.6182964169964367, "grad_norm": 1.2935394048690796, "learning_rate": 6.716336244468954e-05, "loss": 1.5686, "step": 17265 }, { "epoch": 0.6183322291260049, "grad_norm": 1.9957224130630493, "learning_rate": 6.715240681289279e-05, "loss": 1.585, "step": 17266 }, { "epoch": 0.6183680412555732, "grad_norm": 1.5544694662094116, "learning_rate": 6.714145162301808e-05, "loss": 1.2735, "step": 17267 }, { "epoch": 0.6184038533851416, "grad_norm": 1.6578425168991089, "learning_rate": 6.713049687521272e-05, "loss": 1.4704, "step": 17268 }, { "epoch": 0.6184396655147099, "grad_norm": 1.665738821029663, "learning_rate": 6.711954256962414e-05, "loss": 1.5401, "step": 17269 }, { "epoch": 0.6184754776442781, "grad_norm": 1.3819257020950317, "learning_rate": 6.71085887063997e-05, "loss": 1.7921, "step": 17270 }, { "epoch": 0.6185112897738464, "grad_norm": 1.3936008214950562, "learning_rate": 6.709763528568677e-05, "loss": 1.2616, "step": 17271 }, { "epoch": 0.6185471019034147, "grad_norm": 1.3957215547561646, "learning_rate": 6.708668230763272e-05, "loss": 1.3208, "step": 17272 }, { "epoch": 0.6185829140329829, "grad_norm": 1.659361481666565, "learning_rate": 6.707572977238489e-05, "loss": 1.3095, "step": 17273 }, { "epoch": 0.6186187261625512, "grad_norm": 1.7726492881774902, "learning_rate": 6.706477768009067e-05, "loss": 1.7466, "step": 17274 }, { "epoch": 0.6186545382921196, "grad_norm": 1.3715457916259766, "learning_rate": 6.705382603089737e-05, "loss": 1.6041, "step": 17275 }, { "epoch": 0.6186903504216879, "grad_norm": 1.717397928237915, "learning_rate": 6.704287482495233e-05, "loss": 1.5324, "step": 17276 }, { "epoch": 0.6187261625512561, "grad_norm": 1.2430498600006104, "learning_rate": 6.70319240624029e-05, "loss": 1.3185, "step": 17277 }, { "epoch": 0.6187619746808244, "grad_norm": 1.5718120336532593, "learning_rate": 6.702097374339644e-05, "loss": 1.1736, "step": 17278 }, { "epoch": 0.6187977868103927, "grad_norm": 1.5527795553207397, "learning_rate": 6.701002386808021e-05, "loss": 1.3052, "step": 17279 }, { "epoch": 0.6188335989399609, "grad_norm": 1.8009364604949951, "learning_rate": 6.699907443660156e-05, "loss": 1.3913, "step": 17280 }, { "epoch": 0.6188694110695292, "grad_norm": 3.0369083881378174, "learning_rate": 6.698812544910781e-05, "loss": 1.4698, "step": 17281 }, { "epoch": 0.6189052231990976, "grad_norm": 1.604009747505188, "learning_rate": 6.697717690574623e-05, "loss": 1.4513, "step": 17282 }, { "epoch": 0.6189410353286658, "grad_norm": 1.6419212818145752, "learning_rate": 6.696622880666415e-05, "loss": 1.2407, "step": 17283 }, { "epoch": 0.6189768474582341, "grad_norm": 2.8187286853790283, "learning_rate": 6.695528115200883e-05, "loss": 1.8745, "step": 17284 }, { "epoch": 0.6190126595878024, "grad_norm": 1.8177802562713623, "learning_rate": 6.69443339419276e-05, "loss": 1.3942, "step": 17285 }, { "epoch": 0.6190484717173707, "grad_norm": 1.7879760265350342, "learning_rate": 6.69333871765677e-05, "loss": 1.7262, "step": 17286 }, { "epoch": 0.6190842838469389, "grad_norm": 1.4644464254379272, "learning_rate": 6.692244085607644e-05, "loss": 1.3389, "step": 17287 }, { "epoch": 0.6191200959765072, "grad_norm": 1.9216207265853882, "learning_rate": 6.69114949806011e-05, "loss": 1.7144, "step": 17288 }, { "epoch": 0.6191559081060756, "grad_norm": 1.678239345550537, "learning_rate": 6.690054955028885e-05, "loss": 1.4053, "step": 17289 }, { "epoch": 0.6191917202356438, "grad_norm": 2.289401054382324, "learning_rate": 6.688960456528705e-05, "loss": 1.5856, "step": 17290 }, { "epoch": 0.6192275323652121, "grad_norm": 1.9415135383605957, "learning_rate": 6.687866002574289e-05, "loss": 1.5077, "step": 17291 }, { "epoch": 0.6192633444947804, "grad_norm": 1.520308256149292, "learning_rate": 6.686771593180365e-05, "loss": 1.2985, "step": 17292 }, { "epoch": 0.6192991566243486, "grad_norm": 1.429878830909729, "learning_rate": 6.685677228361654e-05, "loss": 1.5415, "step": 17293 }, { "epoch": 0.6193349687539169, "grad_norm": 1.9562313556671143, "learning_rate": 6.684582908132883e-05, "loss": 1.6137, "step": 17294 }, { "epoch": 0.6193707808834852, "grad_norm": 2.758021831512451, "learning_rate": 6.68348863250877e-05, "loss": 1.4741, "step": 17295 }, { "epoch": 0.6194065930130536, "grad_norm": 1.5317972898483276, "learning_rate": 6.682394401504042e-05, "loss": 1.7757, "step": 17296 }, { "epoch": 0.6194424051426218, "grad_norm": 1.6802877187728882, "learning_rate": 6.681300215133419e-05, "loss": 1.4877, "step": 17297 }, { "epoch": 0.6194782172721901, "grad_norm": 1.4757306575775146, "learning_rate": 6.680206073411616e-05, "loss": 1.478, "step": 17298 }, { "epoch": 0.6195140294017584, "grad_norm": 1.6011658906936646, "learning_rate": 6.679111976353362e-05, "loss": 1.6643, "step": 17299 }, { "epoch": 0.6195498415313266, "grad_norm": 1.3765159845352173, "learning_rate": 6.67801792397337e-05, "loss": 1.3161, "step": 17300 }, { "epoch": 0.6195856536608949, "grad_norm": 1.9646791219711304, "learning_rate": 6.676923916286365e-05, "loss": 1.4321, "step": 17301 }, { "epoch": 0.6196214657904632, "grad_norm": 1.6221250295639038, "learning_rate": 6.675829953307057e-05, "loss": 1.4556, "step": 17302 }, { "epoch": 0.6196572779200316, "grad_norm": 1.8408706188201904, "learning_rate": 6.674736035050173e-05, "loss": 1.3377, "step": 17303 }, { "epoch": 0.6196930900495998, "grad_norm": 1.496396541595459, "learning_rate": 6.673642161530424e-05, "loss": 1.1605, "step": 17304 }, { "epoch": 0.6197289021791681, "grad_norm": 1.4522924423217773, "learning_rate": 6.672548332762533e-05, "loss": 1.586, "step": 17305 }, { "epoch": 0.6197647143087364, "grad_norm": 1.8669017553329468, "learning_rate": 6.671454548761212e-05, "loss": 1.3713, "step": 17306 }, { "epoch": 0.6198005264383046, "grad_norm": 1.4297741651535034, "learning_rate": 6.670360809541171e-05, "loss": 1.4443, "step": 17307 }, { "epoch": 0.6198363385678729, "grad_norm": 1.9161500930786133, "learning_rate": 6.669267115117137e-05, "loss": 1.2736, "step": 17308 }, { "epoch": 0.6198721506974412, "grad_norm": 1.817219614982605, "learning_rate": 6.66817346550381e-05, "loss": 1.4487, "step": 17309 }, { "epoch": 0.6199079628270096, "grad_norm": 1.851157307624817, "learning_rate": 6.66707986071592e-05, "loss": 1.5179, "step": 17310 }, { "epoch": 0.6199437749565778, "grad_norm": 1.622918725013733, "learning_rate": 6.665986300768163e-05, "loss": 1.6099, "step": 17311 }, { "epoch": 0.6199795870861461, "grad_norm": 1.516659140586853, "learning_rate": 6.664892785675267e-05, "loss": 1.4413, "step": 17312 }, { "epoch": 0.6200153992157144, "grad_norm": 1.7160168886184692, "learning_rate": 6.663799315451931e-05, "loss": 1.743, "step": 17313 }, { "epoch": 0.6200512113452826, "grad_norm": 1.5063496828079224, "learning_rate": 6.662705890112876e-05, "loss": 1.5858, "step": 17314 }, { "epoch": 0.6200870234748509, "grad_norm": 1.5053106546401978, "learning_rate": 6.661612509672808e-05, "loss": 1.6044, "step": 17315 }, { "epoch": 0.6201228356044192, "grad_norm": 1.300164818763733, "learning_rate": 6.660519174146433e-05, "loss": 1.6188, "step": 17316 }, { "epoch": 0.6201586477339875, "grad_norm": 3.0625481605529785, "learning_rate": 6.659425883548471e-05, "loss": 1.6178, "step": 17317 }, { "epoch": 0.6201944598635558, "grad_norm": 1.4336274862289429, "learning_rate": 6.658332637893619e-05, "loss": 1.656, "step": 17318 }, { "epoch": 0.6202302719931241, "grad_norm": 2.179532051086426, "learning_rate": 6.657239437196596e-05, "loss": 1.6422, "step": 17319 }, { "epoch": 0.6202660841226924, "grad_norm": 1.5122623443603516, "learning_rate": 6.656146281472098e-05, "loss": 1.5129, "step": 17320 }, { "epoch": 0.6203018962522606, "grad_norm": 1.994327425956726, "learning_rate": 6.655053170734846e-05, "loss": 1.3558, "step": 17321 }, { "epoch": 0.6203377083818289, "grad_norm": 2.061368227005005, "learning_rate": 6.653960104999537e-05, "loss": 1.2273, "step": 17322 }, { "epoch": 0.6203735205113972, "grad_norm": 2.0309760570526123, "learning_rate": 6.652867084280876e-05, "loss": 1.288, "step": 17323 }, { "epoch": 0.6204093326409655, "grad_norm": 1.7593995332717896, "learning_rate": 6.651774108593574e-05, "loss": 1.3319, "step": 17324 }, { "epoch": 0.6204451447705338, "grad_norm": 1.9512978792190552, "learning_rate": 6.650681177952328e-05, "loss": 1.2813, "step": 17325 }, { "epoch": 0.6204809569001021, "grad_norm": 1.8783950805664062, "learning_rate": 6.64958829237185e-05, "loss": 1.346, "step": 17326 }, { "epoch": 0.6205167690296703, "grad_norm": 1.5158321857452393, "learning_rate": 6.648495451866838e-05, "loss": 1.526, "step": 17327 }, { "epoch": 0.6205525811592386, "grad_norm": 1.927087426185608, "learning_rate": 6.647402656451998e-05, "loss": 1.628, "step": 17328 }, { "epoch": 0.6205883932888069, "grad_norm": 1.507460117340088, "learning_rate": 6.646309906142027e-05, "loss": 1.6315, "step": 17329 }, { "epoch": 0.6206242054183752, "grad_norm": 1.6917349100112915, "learning_rate": 6.645217200951636e-05, "loss": 1.7155, "step": 17330 }, { "epoch": 0.6206600175479435, "grad_norm": 1.8586498498916626, "learning_rate": 6.644124540895518e-05, "loss": 1.39, "step": 17331 }, { "epoch": 0.6206958296775118, "grad_norm": 2.0919973850250244, "learning_rate": 6.643031925988375e-05, "loss": 1.6374, "step": 17332 }, { "epoch": 0.6207316418070801, "grad_norm": 1.7580621242523193, "learning_rate": 6.641939356244908e-05, "loss": 1.2822, "step": 17333 }, { "epoch": 0.6207674539366483, "grad_norm": 1.9603501558303833, "learning_rate": 6.640846831679815e-05, "loss": 1.1557, "step": 17334 }, { "epoch": 0.6208032660662166, "grad_norm": 1.6108684539794922, "learning_rate": 6.639754352307794e-05, "loss": 1.203, "step": 17335 }, { "epoch": 0.6208390781957849, "grad_norm": 1.8438420295715332, "learning_rate": 6.638661918143542e-05, "loss": 1.335, "step": 17336 }, { "epoch": 0.6208748903253531, "grad_norm": 1.7761483192443848, "learning_rate": 6.637569529201763e-05, "loss": 1.3621, "step": 17337 }, { "epoch": 0.6209107024549215, "grad_norm": 1.4380193948745728, "learning_rate": 6.636477185497145e-05, "loss": 1.5014, "step": 17338 }, { "epoch": 0.6209465145844898, "grad_norm": 2.0738120079040527, "learning_rate": 6.63538488704439e-05, "loss": 1.3528, "step": 17339 }, { "epoch": 0.6209823267140581, "grad_norm": 1.7343157529830933, "learning_rate": 6.634292633858191e-05, "loss": 1.5682, "step": 17340 }, { "epoch": 0.6210181388436263, "grad_norm": 1.6832574605941772, "learning_rate": 6.633200425953241e-05, "loss": 1.6061, "step": 17341 }, { "epoch": 0.6210539509731946, "grad_norm": 1.3964799642562866, "learning_rate": 6.632108263344238e-05, "loss": 1.2056, "step": 17342 }, { "epoch": 0.6210897631027629, "grad_norm": 1.7358297109603882, "learning_rate": 6.631016146045874e-05, "loss": 1.2508, "step": 17343 }, { "epoch": 0.6211255752323311, "grad_norm": 1.7798261642456055, "learning_rate": 6.629924074072844e-05, "loss": 1.5458, "step": 17344 }, { "epoch": 0.6211613873618995, "grad_norm": 1.7764931917190552, "learning_rate": 6.628832047439835e-05, "loss": 1.4655, "step": 17345 }, { "epoch": 0.6211971994914678, "grad_norm": 2.0410125255584717, "learning_rate": 6.627740066161545e-05, "loss": 1.3842, "step": 17346 }, { "epoch": 0.6212330116210361, "grad_norm": 1.4399596452713013, "learning_rate": 6.62664813025266e-05, "loss": 1.3467, "step": 17347 }, { "epoch": 0.6212688237506043, "grad_norm": 1.5043425559997559, "learning_rate": 6.625556239727875e-05, "loss": 1.5523, "step": 17348 }, { "epoch": 0.6213046358801726, "grad_norm": 1.7739176750183105, "learning_rate": 6.624464394601879e-05, "loss": 1.1175, "step": 17349 }, { "epoch": 0.6213404480097409, "grad_norm": 1.4829490184783936, "learning_rate": 6.623372594889358e-05, "loss": 1.5191, "step": 17350 }, { "epoch": 0.6213762601393091, "grad_norm": 1.4071301221847534, "learning_rate": 6.622280840605005e-05, "loss": 1.3898, "step": 17351 }, { "epoch": 0.6214120722688775, "grad_norm": 3.8605194091796875, "learning_rate": 6.621189131763505e-05, "loss": 1.3449, "step": 17352 }, { "epoch": 0.6214478843984458, "grad_norm": 1.5796082019805908, "learning_rate": 6.620097468379548e-05, "loss": 1.176, "step": 17353 }, { "epoch": 0.621483696528014, "grad_norm": 1.4441759586334229, "learning_rate": 6.619005850467818e-05, "loss": 1.422, "step": 17354 }, { "epoch": 0.6215195086575823, "grad_norm": 1.868899941444397, "learning_rate": 6.617914278043005e-05, "loss": 1.5502, "step": 17355 }, { "epoch": 0.6215553207871506, "grad_norm": 1.4756184816360474, "learning_rate": 6.616822751119792e-05, "loss": 1.4594, "step": 17356 }, { "epoch": 0.6215911329167189, "grad_norm": 1.7463960647583008, "learning_rate": 6.615731269712864e-05, "loss": 1.4894, "step": 17357 }, { "epoch": 0.6216269450462871, "grad_norm": 1.8824423551559448, "learning_rate": 6.614639833836908e-05, "loss": 1.4013, "step": 17358 }, { "epoch": 0.6216627571758555, "grad_norm": 1.6457586288452148, "learning_rate": 6.613548443506605e-05, "loss": 1.6566, "step": 17359 }, { "epoch": 0.6216985693054238, "grad_norm": 2.025904893875122, "learning_rate": 6.612457098736642e-05, "loss": 1.5478, "step": 17360 }, { "epoch": 0.621734381434992, "grad_norm": 1.5645883083343506, "learning_rate": 6.611365799541695e-05, "loss": 1.5564, "step": 17361 }, { "epoch": 0.6217701935645603, "grad_norm": 1.3645058870315552, "learning_rate": 6.610274545936455e-05, "loss": 1.4919, "step": 17362 }, { "epoch": 0.6218060056941286, "grad_norm": 1.9030429124832153, "learning_rate": 6.609183337935594e-05, "loss": 1.7562, "step": 17363 }, { "epoch": 0.6218418178236969, "grad_norm": 1.3259434700012207, "learning_rate": 6.6080921755538e-05, "loss": 1.4657, "step": 17364 }, { "epoch": 0.6218776299532651, "grad_norm": 1.6076582670211792, "learning_rate": 6.607001058805749e-05, "loss": 1.5636, "step": 17365 }, { "epoch": 0.6219134420828335, "grad_norm": 1.4271641969680786, "learning_rate": 6.605909987706125e-05, "loss": 1.3398, "step": 17366 }, { "epoch": 0.6219492542124018, "grad_norm": 1.5322728157043457, "learning_rate": 6.604818962269602e-05, "loss": 1.1012, "step": 17367 }, { "epoch": 0.62198506634197, "grad_norm": 1.7007532119750977, "learning_rate": 6.603727982510859e-05, "loss": 1.3559, "step": 17368 }, { "epoch": 0.6220208784715383, "grad_norm": 1.661720633506775, "learning_rate": 6.602637048444578e-05, "loss": 1.6362, "step": 17369 }, { "epoch": 0.6220566906011066, "grad_norm": 1.6217849254608154, "learning_rate": 6.60154616008543e-05, "loss": 1.1952, "step": 17370 }, { "epoch": 0.6220925027306748, "grad_norm": 1.706960678100586, "learning_rate": 6.600455317448098e-05, "loss": 1.4172, "step": 17371 }, { "epoch": 0.6221283148602431, "grad_norm": 2.1763417720794678, "learning_rate": 6.599364520547251e-05, "loss": 1.653, "step": 17372 }, { "epoch": 0.6221641269898115, "grad_norm": 1.2412214279174805, "learning_rate": 6.598273769397572e-05, "loss": 1.522, "step": 17373 }, { "epoch": 0.6221999391193798, "grad_norm": 2.1275556087493896, "learning_rate": 6.597183064013728e-05, "loss": 1.3012, "step": 17374 }, { "epoch": 0.622235751248948, "grad_norm": 1.7945011854171753, "learning_rate": 6.5960924044104e-05, "loss": 1.3099, "step": 17375 }, { "epoch": 0.6222715633785163, "grad_norm": 1.436598300933838, "learning_rate": 6.595001790602255e-05, "loss": 1.347, "step": 17376 }, { "epoch": 0.6223073755080846, "grad_norm": 1.3291562795639038, "learning_rate": 6.593911222603969e-05, "loss": 1.2779, "step": 17377 }, { "epoch": 0.6223431876376528, "grad_norm": 1.5897414684295654, "learning_rate": 6.592820700430215e-05, "loss": 1.7034, "step": 17378 }, { "epoch": 0.6223789997672211, "grad_norm": 1.8603501319885254, "learning_rate": 6.591730224095663e-05, "loss": 1.4712, "step": 17379 }, { "epoch": 0.6224148118967895, "grad_norm": 2.969459056854248, "learning_rate": 6.590639793614985e-05, "loss": 1.3636, "step": 17380 }, { "epoch": 0.6224506240263578, "grad_norm": 2.0669147968292236, "learning_rate": 6.589549409002851e-05, "loss": 1.5975, "step": 17381 }, { "epoch": 0.622486436155926, "grad_norm": 2.064814329147339, "learning_rate": 6.588459070273931e-05, "loss": 1.3923, "step": 17382 }, { "epoch": 0.6225222482854943, "grad_norm": 1.8036562204360962, "learning_rate": 6.58736877744289e-05, "loss": 1.8272, "step": 17383 }, { "epoch": 0.6225580604150626, "grad_norm": 1.964530348777771, "learning_rate": 6.586278530524405e-05, "loss": 1.5506, "step": 17384 }, { "epoch": 0.6225938725446308, "grad_norm": 1.5007630586624146, "learning_rate": 6.58518832953314e-05, "loss": 1.519, "step": 17385 }, { "epoch": 0.6226296846741991, "grad_norm": 1.749772310256958, "learning_rate": 6.584098174483754e-05, "loss": 1.2044, "step": 17386 }, { "epoch": 0.6226654968037675, "grad_norm": 1.545983910560608, "learning_rate": 6.583008065390925e-05, "loss": 1.4835, "step": 17387 }, { "epoch": 0.6227013089333358, "grad_norm": 1.9022502899169922, "learning_rate": 6.581918002269315e-05, "loss": 1.415, "step": 17388 }, { "epoch": 0.622737121062904, "grad_norm": 1.8049886226654053, "learning_rate": 6.58082798513359e-05, "loss": 1.5487, "step": 17389 }, { "epoch": 0.6227729331924723, "grad_norm": 1.7206758260726929, "learning_rate": 6.579738013998411e-05, "loss": 1.6095, "step": 17390 }, { "epoch": 0.6228087453220406, "grad_norm": 1.4274195432662964, "learning_rate": 6.578648088878449e-05, "loss": 1.5214, "step": 17391 }, { "epoch": 0.6228445574516088, "grad_norm": 2.0439743995666504, "learning_rate": 6.577558209788362e-05, "loss": 1.6615, "step": 17392 }, { "epoch": 0.6228803695811771, "grad_norm": 1.6557294130325317, "learning_rate": 6.576468376742815e-05, "loss": 1.5092, "step": 17393 }, { "epoch": 0.6229161817107455, "grad_norm": 1.3613946437835693, "learning_rate": 6.575378589756472e-05, "loss": 1.4136, "step": 17394 }, { "epoch": 0.6229519938403137, "grad_norm": 1.593880295753479, "learning_rate": 6.574288848843988e-05, "loss": 1.2501, "step": 17395 }, { "epoch": 0.622987805969882, "grad_norm": 2.1387696266174316, "learning_rate": 6.573199154020033e-05, "loss": 1.563, "step": 17396 }, { "epoch": 0.6230236180994503, "grad_norm": 1.517091989517212, "learning_rate": 6.57210950529926e-05, "loss": 1.4276, "step": 17397 }, { "epoch": 0.6230594302290186, "grad_norm": 1.2697229385375977, "learning_rate": 6.571019902696335e-05, "loss": 1.281, "step": 17398 }, { "epoch": 0.6230952423585868, "grad_norm": 1.6039862632751465, "learning_rate": 6.569930346225909e-05, "loss": 1.5502, "step": 17399 }, { "epoch": 0.6231310544881551, "grad_norm": 1.4640159606933594, "learning_rate": 6.56884083590265e-05, "loss": 1.2747, "step": 17400 }, { "epoch": 0.6231668666177235, "grad_norm": 1.5991400480270386, "learning_rate": 6.567751371741209e-05, "loss": 1.6825, "step": 17401 }, { "epoch": 0.6232026787472917, "grad_norm": 1.5831888914108276, "learning_rate": 6.566661953756248e-05, "loss": 1.3705, "step": 17402 }, { "epoch": 0.62323849087686, "grad_norm": 1.7924561500549316, "learning_rate": 6.565572581962425e-05, "loss": 1.5186, "step": 17403 }, { "epoch": 0.6232743030064283, "grad_norm": 2.017235040664673, "learning_rate": 6.564483256374386e-05, "loss": 1.1964, "step": 17404 }, { "epoch": 0.6233101151359965, "grad_norm": 1.7464269399642944, "learning_rate": 6.5633939770068e-05, "loss": 1.7076, "step": 17405 }, { "epoch": 0.6233459272655648, "grad_norm": 1.9727259874343872, "learning_rate": 6.562304743874308e-05, "loss": 1.4771, "step": 17406 }, { "epoch": 0.6233817393951331, "grad_norm": 1.5742576122283936, "learning_rate": 6.561215556991578e-05, "loss": 1.3384, "step": 17407 }, { "epoch": 0.6234175515247015, "grad_norm": 1.320135235786438, "learning_rate": 6.56012641637325e-05, "loss": 1.4939, "step": 17408 }, { "epoch": 0.6234533636542697, "grad_norm": 1.445614218711853, "learning_rate": 6.559037322033991e-05, "loss": 1.5254, "step": 17409 }, { "epoch": 0.623489175783838, "grad_norm": 1.6739253997802734, "learning_rate": 6.55794827398844e-05, "loss": 1.1231, "step": 17410 }, { "epoch": 0.6235249879134063, "grad_norm": 1.5012933015823364, "learning_rate": 6.556859272251261e-05, "loss": 1.7445, "step": 17411 }, { "epoch": 0.6235608000429745, "grad_norm": 1.6745601892471313, "learning_rate": 6.555770316837098e-05, "loss": 1.4139, "step": 17412 }, { "epoch": 0.6235966121725428, "grad_norm": 1.7321993112564087, "learning_rate": 6.554681407760598e-05, "loss": 1.4242, "step": 17413 }, { "epoch": 0.6236324243021111, "grad_norm": 1.5765000581741333, "learning_rate": 6.553592545036421e-05, "loss": 1.5936, "step": 17414 }, { "epoch": 0.6236682364316795, "grad_norm": 2.3662610054016113, "learning_rate": 6.552503728679204e-05, "loss": 1.3233, "step": 17415 }, { "epoch": 0.6237040485612477, "grad_norm": 1.363168478012085, "learning_rate": 6.551414958703611e-05, "loss": 1.3414, "step": 17416 }, { "epoch": 0.623739860690816, "grad_norm": 1.336014986038208, "learning_rate": 6.550326235124274e-05, "loss": 1.4875, "step": 17417 }, { "epoch": 0.6237756728203843, "grad_norm": 1.4121977090835571, "learning_rate": 6.549237557955854e-05, "loss": 1.8272, "step": 17418 }, { "epoch": 0.6238114849499525, "grad_norm": 1.8297215700149536, "learning_rate": 6.54814892721299e-05, "loss": 1.1333, "step": 17419 }, { "epoch": 0.6238472970795208, "grad_norm": 1.8196252584457397, "learning_rate": 6.547060342910324e-05, "loss": 1.7198, "step": 17420 }, { "epoch": 0.6238831092090891, "grad_norm": 2.3765392303466797, "learning_rate": 6.545971805062514e-05, "loss": 1.7214, "step": 17421 }, { "epoch": 0.6239189213386575, "grad_norm": 1.7555404901504517, "learning_rate": 6.544883313684193e-05, "loss": 1.4639, "step": 17422 }, { "epoch": 0.6239547334682257, "grad_norm": 1.5131053924560547, "learning_rate": 6.543794868790015e-05, "loss": 1.2069, "step": 17423 }, { "epoch": 0.623990545597794, "grad_norm": 1.5095545053482056, "learning_rate": 6.542706470394614e-05, "loss": 1.6183, "step": 17424 }, { "epoch": 0.6240263577273623, "grad_norm": 2.3657705783843994, "learning_rate": 6.54161811851264e-05, "loss": 1.8115, "step": 17425 }, { "epoch": 0.6240621698569305, "grad_norm": 1.826514482498169, "learning_rate": 6.540529813158732e-05, "loss": 1.5298, "step": 17426 }, { "epoch": 0.6240979819864988, "grad_norm": 1.7273763418197632, "learning_rate": 6.539441554347537e-05, "loss": 1.4563, "step": 17427 }, { "epoch": 0.6241337941160671, "grad_norm": 2.3398561477661133, "learning_rate": 6.538353342093689e-05, "loss": 1.6562, "step": 17428 }, { "epoch": 0.6241696062456354, "grad_norm": 1.5986614227294922, "learning_rate": 6.537265176411831e-05, "loss": 1.5047, "step": 17429 }, { "epoch": 0.6242054183752037, "grad_norm": 1.5671006441116333, "learning_rate": 6.536177057316605e-05, "loss": 1.452, "step": 17430 }, { "epoch": 0.624241230504772, "grad_norm": 1.5327402353286743, "learning_rate": 6.535088984822647e-05, "loss": 1.5385, "step": 17431 }, { "epoch": 0.6242770426343403, "grad_norm": 1.9305261373519897, "learning_rate": 6.5340009589446e-05, "loss": 1.5302, "step": 17432 }, { "epoch": 0.6243128547639085, "grad_norm": 1.7964725494384766, "learning_rate": 6.532912979697095e-05, "loss": 1.6203, "step": 17433 }, { "epoch": 0.6243486668934768, "grad_norm": 1.4314013719558716, "learning_rate": 6.531825047094778e-05, "loss": 1.6102, "step": 17434 }, { "epoch": 0.6243844790230451, "grad_norm": 1.3836110830307007, "learning_rate": 6.530737161152278e-05, "loss": 1.6876, "step": 17435 }, { "epoch": 0.6244202911526134, "grad_norm": 1.6772618293762207, "learning_rate": 6.529649321884237e-05, "loss": 1.4962, "step": 17436 }, { "epoch": 0.6244561032821817, "grad_norm": 1.6304844617843628, "learning_rate": 6.528561529305289e-05, "loss": 1.2627, "step": 17437 }, { "epoch": 0.62449191541175, "grad_norm": 1.0868332386016846, "learning_rate": 6.527473783430064e-05, "loss": 1.2082, "step": 17438 }, { "epoch": 0.6245277275413182, "grad_norm": 1.9123562574386597, "learning_rate": 6.526386084273202e-05, "loss": 1.535, "step": 17439 }, { "epoch": 0.6245635396708865, "grad_norm": 1.7259448766708374, "learning_rate": 6.525298431849334e-05, "loss": 1.6374, "step": 17440 }, { "epoch": 0.6245993518004548, "grad_norm": 1.6573362350463867, "learning_rate": 6.524210826173094e-05, "loss": 1.4254, "step": 17441 }, { "epoch": 0.624635163930023, "grad_norm": 1.7752695083618164, "learning_rate": 6.523123267259113e-05, "loss": 1.3835, "step": 17442 }, { "epoch": 0.6246709760595914, "grad_norm": 2.4315598011016846, "learning_rate": 6.522035755122024e-05, "loss": 1.5414, "step": 17443 }, { "epoch": 0.6247067881891597, "grad_norm": 1.4300681352615356, "learning_rate": 6.520948289776459e-05, "loss": 1.7243, "step": 17444 }, { "epoch": 0.624742600318728, "grad_norm": 1.8628979921340942, "learning_rate": 6.519860871237046e-05, "loss": 1.4246, "step": 17445 }, { "epoch": 0.6247784124482962, "grad_norm": 2.1132607460021973, "learning_rate": 6.518773499518418e-05, "loss": 1.5807, "step": 17446 }, { "epoch": 0.6248142245778645, "grad_norm": 1.2737668752670288, "learning_rate": 6.517686174635198e-05, "loss": 1.4673, "step": 17447 }, { "epoch": 0.6248500367074328, "grad_norm": 1.5888752937316895, "learning_rate": 6.516598896602022e-05, "loss": 1.4605, "step": 17448 }, { "epoch": 0.624885848837001, "grad_norm": 1.590665578842163, "learning_rate": 6.515511665433513e-05, "loss": 1.5737, "step": 17449 }, { "epoch": 0.6249216609665694, "grad_norm": 2.3191263675689697, "learning_rate": 6.514424481144301e-05, "loss": 1.3844, "step": 17450 }, { "epoch": 0.6249574730961377, "grad_norm": 1.6052703857421875, "learning_rate": 6.513337343749008e-05, "loss": 1.6712, "step": 17451 }, { "epoch": 0.624993285225706, "grad_norm": 1.7228527069091797, "learning_rate": 6.512250253262268e-05, "loss": 1.4053, "step": 17452 }, { "epoch": 0.6250290973552742, "grad_norm": 2.069012403488159, "learning_rate": 6.511163209698701e-05, "loss": 1.5595, "step": 17453 }, { "epoch": 0.6250649094848425, "grad_norm": 1.8847413063049316, "learning_rate": 6.510076213072932e-05, "loss": 1.4639, "step": 17454 }, { "epoch": 0.6251007216144108, "grad_norm": 1.9726908206939697, "learning_rate": 6.508989263399588e-05, "loss": 1.2424, "step": 17455 }, { "epoch": 0.625136533743979, "grad_norm": 1.4060355424880981, "learning_rate": 6.507902360693286e-05, "loss": 1.4809, "step": 17456 }, { "epoch": 0.6251723458735474, "grad_norm": 1.643329381942749, "learning_rate": 6.506815504968657e-05, "loss": 1.4356, "step": 17457 }, { "epoch": 0.6252081580031157, "grad_norm": 1.3762401342391968, "learning_rate": 6.505728696240316e-05, "loss": 1.3249, "step": 17458 }, { "epoch": 0.625243970132684, "grad_norm": 1.967424988746643, "learning_rate": 6.504641934522892e-05, "loss": 1.3091, "step": 17459 }, { "epoch": 0.6252797822622522, "grad_norm": 1.5457974672317505, "learning_rate": 6.503555219830999e-05, "loss": 1.1949, "step": 17460 }, { "epoch": 0.6253155943918205, "grad_norm": 2.106031656265259, "learning_rate": 6.502468552179263e-05, "loss": 1.6295, "step": 17461 }, { "epoch": 0.6253514065213888, "grad_norm": 1.3233288526535034, "learning_rate": 6.501381931582297e-05, "loss": 1.4302, "step": 17462 }, { "epoch": 0.625387218650957, "grad_norm": 2.170650005340576, "learning_rate": 6.500295358054729e-05, "loss": 1.4235, "step": 17463 }, { "epoch": 0.6254230307805254, "grad_norm": 1.5497565269470215, "learning_rate": 6.499208831611172e-05, "loss": 1.2203, "step": 17464 }, { "epoch": 0.6254588429100937, "grad_norm": 2.3494434356689453, "learning_rate": 6.498122352266242e-05, "loss": 1.5483, "step": 17465 }, { "epoch": 0.625494655039662, "grad_norm": 1.4786423444747925, "learning_rate": 6.497035920034561e-05, "loss": 1.4961, "step": 17466 }, { "epoch": 0.6255304671692302, "grad_norm": 1.964049220085144, "learning_rate": 6.49594953493074e-05, "loss": 1.5353, "step": 17467 }, { "epoch": 0.6255662792987985, "grad_norm": 1.518816351890564, "learning_rate": 6.494863196969403e-05, "loss": 1.7526, "step": 17468 }, { "epoch": 0.6256020914283668, "grad_norm": 1.3744655847549438, "learning_rate": 6.493776906165155e-05, "loss": 1.5259, "step": 17469 }, { "epoch": 0.625637903557935, "grad_norm": 1.8796976804733276, "learning_rate": 6.49269066253262e-05, "loss": 1.6298, "step": 17470 }, { "epoch": 0.6256737156875034, "grad_norm": 1.5563455820083618, "learning_rate": 6.491604466086405e-05, "loss": 1.4925, "step": 17471 }, { "epoch": 0.6257095278170717, "grad_norm": 1.5489403009414673, "learning_rate": 6.49051831684113e-05, "loss": 1.2473, "step": 17472 }, { "epoch": 0.62574533994664, "grad_norm": 1.5099996328353882, "learning_rate": 6.489432214811403e-05, "loss": 1.2018, "step": 17473 }, { "epoch": 0.6257811520762082, "grad_norm": 1.9059803485870361, "learning_rate": 6.488346160011835e-05, "loss": 1.2401, "step": 17474 }, { "epoch": 0.6258169642057765, "grad_norm": 1.7727347612380981, "learning_rate": 6.487260152457041e-05, "loss": 1.5937, "step": 17475 }, { "epoch": 0.6258527763353448, "grad_norm": 1.7974789142608643, "learning_rate": 6.486174192161632e-05, "loss": 1.5348, "step": 17476 }, { "epoch": 0.625888588464913, "grad_norm": 1.85459566116333, "learning_rate": 6.485088279140214e-05, "loss": 1.7568, "step": 17477 }, { "epoch": 0.6259244005944814, "grad_norm": 1.5206538438796997, "learning_rate": 6.484002413407401e-05, "loss": 1.3962, "step": 17478 }, { "epoch": 0.6259602127240497, "grad_norm": 1.4857813119888306, "learning_rate": 6.4829165949778e-05, "loss": 1.5086, "step": 17479 }, { "epoch": 0.6259960248536179, "grad_norm": 1.4960447549819946, "learning_rate": 6.481830823866018e-05, "loss": 1.3399, "step": 17480 }, { "epoch": 0.6260318369831862, "grad_norm": 1.2727571725845337, "learning_rate": 6.480745100086668e-05, "loss": 1.4466, "step": 17481 }, { "epoch": 0.6260676491127545, "grad_norm": 1.508490800857544, "learning_rate": 6.479659423654352e-05, "loss": 1.4005, "step": 17482 }, { "epoch": 0.6261034612423227, "grad_norm": 1.6986658573150635, "learning_rate": 6.478573794583673e-05, "loss": 1.4465, "step": 17483 }, { "epoch": 0.626139273371891, "grad_norm": 1.3787263631820679, "learning_rate": 6.477488212889246e-05, "loss": 1.4511, "step": 17484 }, { "epoch": 0.6261750855014594, "grad_norm": 1.7577788829803467, "learning_rate": 6.476402678585669e-05, "loss": 1.1861, "step": 17485 }, { "epoch": 0.6262108976310277, "grad_norm": 1.4000847339630127, "learning_rate": 6.47531719168755e-05, "loss": 1.5713, "step": 17486 }, { "epoch": 0.6262467097605959, "grad_norm": 2.0742688179016113, "learning_rate": 6.474231752209492e-05, "loss": 1.5913, "step": 17487 }, { "epoch": 0.6262825218901642, "grad_norm": 1.7926561832427979, "learning_rate": 6.473146360166098e-05, "loss": 1.533, "step": 17488 }, { "epoch": 0.6263183340197325, "grad_norm": 1.5476453304290771, "learning_rate": 6.472061015571968e-05, "loss": 1.4394, "step": 17489 }, { "epoch": 0.6263541461493007, "grad_norm": 1.240106225013733, "learning_rate": 6.47097571844171e-05, "loss": 1.4792, "step": 17490 }, { "epoch": 0.626389958278869, "grad_norm": 1.3444880247116089, "learning_rate": 6.469890468789922e-05, "loss": 1.5799, "step": 17491 }, { "epoch": 0.6264257704084374, "grad_norm": 1.7917805910110474, "learning_rate": 6.468805266631199e-05, "loss": 1.5301, "step": 17492 }, { "epoch": 0.6264615825380057, "grad_norm": 2.6330838203430176, "learning_rate": 6.467720111980151e-05, "loss": 1.6501, "step": 17493 }, { "epoch": 0.6264973946675739, "grad_norm": 1.631439208984375, "learning_rate": 6.466635004851367e-05, "loss": 1.368, "step": 17494 }, { "epoch": 0.6265332067971422, "grad_norm": 1.8269366025924683, "learning_rate": 6.46554994525946e-05, "loss": 1.6264, "step": 17495 }, { "epoch": 0.6265690189267105, "grad_norm": 1.577916145324707, "learning_rate": 6.46446493321901e-05, "loss": 1.3685, "step": 17496 }, { "epoch": 0.6266048310562787, "grad_norm": 1.450053095817566, "learning_rate": 6.46337996874463e-05, "loss": 1.572, "step": 17497 }, { "epoch": 0.626640643185847, "grad_norm": 1.6219733953475952, "learning_rate": 6.462295051850907e-05, "loss": 1.4317, "step": 17498 }, { "epoch": 0.6266764553154154, "grad_norm": 1.7506242990493774, "learning_rate": 6.461210182552444e-05, "loss": 1.5448, "step": 17499 }, { "epoch": 0.6267122674449837, "grad_norm": 1.4958577156066895, "learning_rate": 6.460125360863835e-05, "loss": 1.1158, "step": 17500 }, { "epoch": 0.6267480795745519, "grad_norm": 1.796212077140808, "learning_rate": 6.459040586799666e-05, "loss": 1.5875, "step": 17501 }, { "epoch": 0.6267838917041202, "grad_norm": 1.9976341724395752, "learning_rate": 6.457955860374545e-05, "loss": 1.4557, "step": 17502 }, { "epoch": 0.6268197038336885, "grad_norm": 1.7986866235733032, "learning_rate": 6.456871181603054e-05, "loss": 1.7378, "step": 17503 }, { "epoch": 0.6268555159632567, "grad_norm": 1.322219967842102, "learning_rate": 6.455786550499796e-05, "loss": 1.3182, "step": 17504 }, { "epoch": 0.626891328092825, "grad_norm": 1.5401942729949951, "learning_rate": 6.454701967079354e-05, "loss": 1.5181, "step": 17505 }, { "epoch": 0.6269271402223934, "grad_norm": 2.3169422149658203, "learning_rate": 6.453617431356327e-05, "loss": 1.8039, "step": 17506 }, { "epoch": 0.6269629523519616, "grad_norm": 1.2413296699523926, "learning_rate": 6.452532943345298e-05, "loss": 1.6122, "step": 17507 }, { "epoch": 0.6269987644815299, "grad_norm": 1.2607512474060059, "learning_rate": 6.451448503060868e-05, "loss": 1.3618, "step": 17508 }, { "epoch": 0.6270345766110982, "grad_norm": 1.9498603343963623, "learning_rate": 6.45036411051762e-05, "loss": 1.2638, "step": 17509 }, { "epoch": 0.6270703887406665, "grad_norm": 1.3453820943832397, "learning_rate": 6.449279765730141e-05, "loss": 1.6058, "step": 17510 }, { "epoch": 0.6271062008702347, "grad_norm": 1.8720459938049316, "learning_rate": 6.448195468713028e-05, "loss": 1.231, "step": 17511 }, { "epoch": 0.627142012999803, "grad_norm": 1.402961254119873, "learning_rate": 6.447111219480857e-05, "loss": 1.5311, "step": 17512 }, { "epoch": 0.6271778251293714, "grad_norm": 1.858162760734558, "learning_rate": 6.446027018048228e-05, "loss": 1.5916, "step": 17513 }, { "epoch": 0.6272136372589396, "grad_norm": 2.5841658115386963, "learning_rate": 6.444942864429713e-05, "loss": 1.3853, "step": 17514 }, { "epoch": 0.6272494493885079, "grad_norm": 1.7156753540039062, "learning_rate": 6.443858758639916e-05, "loss": 1.5192, "step": 17515 }, { "epoch": 0.6272852615180762, "grad_norm": 1.7334517240524292, "learning_rate": 6.442774700693408e-05, "loss": 1.2638, "step": 17516 }, { "epoch": 0.6273210736476444, "grad_norm": 1.4472137689590454, "learning_rate": 6.441690690604775e-05, "loss": 1.9229, "step": 17517 }, { "epoch": 0.6273568857772127, "grad_norm": 1.426712989807129, "learning_rate": 6.440606728388607e-05, "loss": 1.4067, "step": 17518 }, { "epoch": 0.627392697906781, "grad_norm": 1.6888214349746704, "learning_rate": 6.439522814059483e-05, "loss": 1.5726, "step": 17519 }, { "epoch": 0.6274285100363494, "grad_norm": 1.7400039434432983, "learning_rate": 6.438438947631989e-05, "loss": 1.3974, "step": 17520 }, { "epoch": 0.6274643221659176, "grad_norm": 1.7932146787643433, "learning_rate": 6.437355129120701e-05, "loss": 1.3313, "step": 17521 }, { "epoch": 0.6275001342954859, "grad_norm": 1.576096773147583, "learning_rate": 6.436271358540206e-05, "loss": 1.0929, "step": 17522 }, { "epoch": 0.6275359464250542, "grad_norm": 1.7114145755767822, "learning_rate": 6.435187635905082e-05, "loss": 1.0729, "step": 17523 }, { "epoch": 0.6275717585546224, "grad_norm": 1.7404381036758423, "learning_rate": 6.434103961229913e-05, "loss": 1.6533, "step": 17524 }, { "epoch": 0.6276075706841907, "grad_norm": 1.227808952331543, "learning_rate": 6.433020334529275e-05, "loss": 1.4551, "step": 17525 }, { "epoch": 0.627643382813759, "grad_norm": 1.4737696647644043, "learning_rate": 6.431936755817746e-05, "loss": 1.5414, "step": 17526 }, { "epoch": 0.6276791949433274, "grad_norm": 1.8455876111984253, "learning_rate": 6.430853225109908e-05, "loss": 1.9524, "step": 17527 }, { "epoch": 0.6277150070728956, "grad_norm": 1.4521657228469849, "learning_rate": 6.42976974242033e-05, "loss": 1.5804, "step": 17528 }, { "epoch": 0.6277508192024639, "grad_norm": 1.937605381011963, "learning_rate": 6.428686307763601e-05, "loss": 1.1705, "step": 17529 }, { "epoch": 0.6277866313320322, "grad_norm": 1.5884835720062256, "learning_rate": 6.427602921154287e-05, "loss": 1.5303, "step": 17530 }, { "epoch": 0.6278224434616004, "grad_norm": 1.7557952404022217, "learning_rate": 6.426519582606971e-05, "loss": 1.269, "step": 17531 }, { "epoch": 0.6278582555911687, "grad_norm": 1.5802031755447388, "learning_rate": 6.42543629213622e-05, "loss": 1.5768, "step": 17532 }, { "epoch": 0.627894067720737, "grad_norm": 1.7923305034637451, "learning_rate": 6.424353049756618e-05, "loss": 1.411, "step": 17533 }, { "epoch": 0.6279298798503054, "grad_norm": 1.7670484781265259, "learning_rate": 6.423269855482732e-05, "loss": 1.6283, "step": 17534 }, { "epoch": 0.6279656919798736, "grad_norm": 1.5500022172927856, "learning_rate": 6.422186709329134e-05, "loss": 1.5638, "step": 17535 }, { "epoch": 0.6280015041094419, "grad_norm": 1.745823860168457, "learning_rate": 6.421103611310402e-05, "loss": 1.4258, "step": 17536 }, { "epoch": 0.6280373162390102, "grad_norm": 1.6846976280212402, "learning_rate": 6.420020561441101e-05, "loss": 1.3898, "step": 17537 }, { "epoch": 0.6280731283685784, "grad_norm": 1.477970004081726, "learning_rate": 6.41893755973581e-05, "loss": 1.4848, "step": 17538 }, { "epoch": 0.6281089404981467, "grad_norm": 1.6424835920333862, "learning_rate": 6.417854606209091e-05, "loss": 1.3707, "step": 17539 }, { "epoch": 0.628144752627715, "grad_norm": 1.994249939918518, "learning_rate": 6.41677170087552e-05, "loss": 1.7501, "step": 17540 }, { "epoch": 0.6281805647572833, "grad_norm": 2.2836825847625732, "learning_rate": 6.41568884374966e-05, "loss": 1.3445, "step": 17541 }, { "epoch": 0.6282163768868516, "grad_norm": 1.7882944345474243, "learning_rate": 6.414606034846087e-05, "loss": 1.2719, "step": 17542 }, { "epoch": 0.6282521890164199, "grad_norm": 1.532196044921875, "learning_rate": 6.413523274179365e-05, "loss": 1.3419, "step": 17543 }, { "epoch": 0.6282880011459882, "grad_norm": 1.7860722541809082, "learning_rate": 6.412440561764059e-05, "loss": 1.613, "step": 17544 }, { "epoch": 0.6283238132755564, "grad_norm": 1.4192113876342773, "learning_rate": 6.411357897614738e-05, "loss": 1.2385, "step": 17545 }, { "epoch": 0.6283596254051247, "grad_norm": 1.6506589651107788, "learning_rate": 6.410275281745967e-05, "loss": 1.4374, "step": 17546 }, { "epoch": 0.628395437534693, "grad_norm": 1.6427680253982544, "learning_rate": 6.409192714172314e-05, "loss": 1.4561, "step": 17547 }, { "epoch": 0.6284312496642613, "grad_norm": 1.5878175497055054, "learning_rate": 6.408110194908338e-05, "loss": 1.3458, "step": 17548 }, { "epoch": 0.6284670617938296, "grad_norm": 1.747763752937317, "learning_rate": 6.407027723968611e-05, "loss": 1.3013, "step": 17549 }, { "epoch": 0.6285028739233979, "grad_norm": 1.7274367809295654, "learning_rate": 6.405945301367687e-05, "loss": 1.7029, "step": 17550 }, { "epoch": 0.6285386860529661, "grad_norm": 1.8182168006896973, "learning_rate": 6.404862927120134e-05, "loss": 1.3585, "step": 17551 }, { "epoch": 0.6285744981825344, "grad_norm": 1.4165538549423218, "learning_rate": 6.403780601240514e-05, "loss": 1.2135, "step": 17552 }, { "epoch": 0.6286103103121027, "grad_norm": 1.433510661125183, "learning_rate": 6.402698323743385e-05, "loss": 1.313, "step": 17553 }, { "epoch": 0.628646122441671, "grad_norm": 1.786746621131897, "learning_rate": 6.401616094643312e-05, "loss": 1.6242, "step": 17554 }, { "epoch": 0.6286819345712393, "grad_norm": 1.820902705192566, "learning_rate": 6.400533913954851e-05, "loss": 1.5524, "step": 17555 }, { "epoch": 0.6287177467008076, "grad_norm": 1.55777907371521, "learning_rate": 6.399451781692567e-05, "loss": 1.2882, "step": 17556 }, { "epoch": 0.6287535588303759, "grad_norm": 1.7453067302703857, "learning_rate": 6.398369697871011e-05, "loss": 1.6019, "step": 17557 }, { "epoch": 0.6287893709599441, "grad_norm": 1.8225655555725098, "learning_rate": 6.397287662504747e-05, "loss": 1.365, "step": 17558 }, { "epoch": 0.6288251830895124, "grad_norm": 1.931082010269165, "learning_rate": 6.39620567560833e-05, "loss": 1.4394, "step": 17559 }, { "epoch": 0.6288609952190807, "grad_norm": 2.121443271636963, "learning_rate": 6.395123737196316e-05, "loss": 1.3972, "step": 17560 }, { "epoch": 0.628896807348649, "grad_norm": 1.686209797859192, "learning_rate": 6.394041847283263e-05, "loss": 1.4342, "step": 17561 }, { "epoch": 0.6289326194782173, "grad_norm": 1.546787977218628, "learning_rate": 6.392960005883726e-05, "loss": 1.4053, "step": 17562 }, { "epoch": 0.6289684316077856, "grad_norm": 1.6761842966079712, "learning_rate": 6.391878213012258e-05, "loss": 1.4526, "step": 17563 }, { "epoch": 0.6290042437373539, "grad_norm": 2.2791762351989746, "learning_rate": 6.390796468683416e-05, "loss": 1.5521, "step": 17564 }, { "epoch": 0.6290400558669221, "grad_norm": 1.24517822265625, "learning_rate": 6.389714772911751e-05, "loss": 1.4677, "step": 17565 }, { "epoch": 0.6290758679964904, "grad_norm": 1.4214081764221191, "learning_rate": 6.388633125711816e-05, "loss": 1.5366, "step": 17566 }, { "epoch": 0.6291116801260587, "grad_norm": 1.4436924457550049, "learning_rate": 6.387551527098165e-05, "loss": 1.5554, "step": 17567 }, { "epoch": 0.6291474922556269, "grad_norm": 1.6681147813796997, "learning_rate": 6.386469977085348e-05, "loss": 1.1127, "step": 17568 }, { "epoch": 0.6291833043851953, "grad_norm": 2.224066734313965, "learning_rate": 6.385388475687918e-05, "loss": 1.6578, "step": 17569 }, { "epoch": 0.6292191165147636, "grad_norm": 1.7209625244140625, "learning_rate": 6.384307022920424e-05, "loss": 1.6013, "step": 17570 }, { "epoch": 0.6292549286443319, "grad_norm": 1.4986475706100464, "learning_rate": 6.383225618797412e-05, "loss": 1.4255, "step": 17571 }, { "epoch": 0.6292907407739001, "grad_norm": 1.6216480731964111, "learning_rate": 6.382144263333436e-05, "loss": 1.6899, "step": 17572 }, { "epoch": 0.6293265529034684, "grad_norm": 2.151085138320923, "learning_rate": 6.381062956543041e-05, "loss": 1.7799, "step": 17573 }, { "epoch": 0.6293623650330367, "grad_norm": 1.4433550834655762, "learning_rate": 6.379981698440778e-05, "loss": 1.4163, "step": 17574 }, { "epoch": 0.6293981771626049, "grad_norm": 1.6672604084014893, "learning_rate": 6.378900489041188e-05, "loss": 1.4949, "step": 17575 }, { "epoch": 0.6294339892921733, "grad_norm": 2.602228879928589, "learning_rate": 6.377819328358826e-05, "loss": 1.5507, "step": 17576 }, { "epoch": 0.6294698014217416, "grad_norm": 1.5993750095367432, "learning_rate": 6.37673821640823e-05, "loss": 1.6015, "step": 17577 }, { "epoch": 0.6295056135513099, "grad_norm": 1.3872603178024292, "learning_rate": 6.375657153203947e-05, "loss": 1.6021, "step": 17578 }, { "epoch": 0.6295414256808781, "grad_norm": 1.776228904724121, "learning_rate": 6.374576138760525e-05, "loss": 1.4317, "step": 17579 }, { "epoch": 0.6295772378104464, "grad_norm": 1.5714781284332275, "learning_rate": 6.3734951730925e-05, "loss": 1.1074, "step": 17580 }, { "epoch": 0.6296130499400147, "grad_norm": 1.8212718963623047, "learning_rate": 6.372414256214423e-05, "loss": 1.4209, "step": 17581 }, { "epoch": 0.6296488620695829, "grad_norm": 1.9626024961471558, "learning_rate": 6.37133338814083e-05, "loss": 1.3858, "step": 17582 }, { "epoch": 0.6296846741991513, "grad_norm": 1.6976439952850342, "learning_rate": 6.370252568886267e-05, "loss": 1.4777, "step": 17583 }, { "epoch": 0.6297204863287196, "grad_norm": 3.100534677505493, "learning_rate": 6.369171798465274e-05, "loss": 1.5667, "step": 17584 }, { "epoch": 0.6297562984582878, "grad_norm": 1.5115562677383423, "learning_rate": 6.368091076892392e-05, "loss": 1.5159, "step": 17585 }, { "epoch": 0.6297921105878561, "grad_norm": 1.9113214015960693, "learning_rate": 6.367010404182158e-05, "loss": 1.447, "step": 17586 }, { "epoch": 0.6298279227174244, "grad_norm": 1.8984917402267456, "learning_rate": 6.365929780349113e-05, "loss": 1.4439, "step": 17587 }, { "epoch": 0.6298637348469927, "grad_norm": 1.6823961734771729, "learning_rate": 6.3648492054078e-05, "loss": 1.5414, "step": 17588 }, { "epoch": 0.6298995469765609, "grad_norm": 1.8366305828094482, "learning_rate": 6.363768679372744e-05, "loss": 1.303, "step": 17589 }, { "epoch": 0.6299353591061293, "grad_norm": 1.6372846364974976, "learning_rate": 6.362688202258496e-05, "loss": 1.4375, "step": 17590 }, { "epoch": 0.6299711712356976, "grad_norm": 2.158540964126587, "learning_rate": 6.361607774079581e-05, "loss": 1.5602, "step": 17591 }, { "epoch": 0.6300069833652658, "grad_norm": 1.4774373769760132, "learning_rate": 6.360527394850547e-05, "loss": 1.3032, "step": 17592 }, { "epoch": 0.6300427954948341, "grad_norm": 1.7481532096862793, "learning_rate": 6.359447064585915e-05, "loss": 1.3593, "step": 17593 }, { "epoch": 0.6300786076244024, "grad_norm": 1.8624064922332764, "learning_rate": 6.358366783300231e-05, "loss": 1.2172, "step": 17594 }, { "epoch": 0.6301144197539706, "grad_norm": 1.4835082292556763, "learning_rate": 6.357286551008024e-05, "loss": 1.4949, "step": 17595 }, { "epoch": 0.6301502318835389, "grad_norm": 2.123321294784546, "learning_rate": 6.356206367723829e-05, "loss": 1.5935, "step": 17596 }, { "epoch": 0.6301860440131073, "grad_norm": 1.4491807222366333, "learning_rate": 6.355126233462179e-05, "loss": 1.6078, "step": 17597 }, { "epoch": 0.6302218561426756, "grad_norm": 1.65678870677948, "learning_rate": 6.354046148237597e-05, "loss": 1.5985, "step": 17598 }, { "epoch": 0.6302576682722438, "grad_norm": 1.9164150953292847, "learning_rate": 6.352966112064627e-05, "loss": 1.6386, "step": 17599 }, { "epoch": 0.6302934804018121, "grad_norm": 1.5138894319534302, "learning_rate": 6.351886124957789e-05, "loss": 1.5511, "step": 17600 }, { "epoch": 0.6303292925313804, "grad_norm": 1.7643005847930908, "learning_rate": 6.350806186931623e-05, "loss": 1.5784, "step": 17601 }, { "epoch": 0.6303651046609486, "grad_norm": 1.719307780265808, "learning_rate": 6.349726298000647e-05, "loss": 1.5924, "step": 17602 }, { "epoch": 0.6304009167905169, "grad_norm": 1.3820748329162598, "learning_rate": 6.3486464581794e-05, "loss": 1.4025, "step": 17603 }, { "epoch": 0.6304367289200853, "grad_norm": 1.675239086151123, "learning_rate": 6.347566667482401e-05, "loss": 1.4112, "step": 17604 }, { "epoch": 0.6304725410496536, "grad_norm": 1.8763279914855957, "learning_rate": 6.346486925924184e-05, "loss": 1.6343, "step": 17605 }, { "epoch": 0.6305083531792218, "grad_norm": 2.052152633666992, "learning_rate": 6.345407233519273e-05, "loss": 1.4135, "step": 17606 }, { "epoch": 0.6305441653087901, "grad_norm": 1.8708726167678833, "learning_rate": 6.344327590282189e-05, "loss": 1.5575, "step": 17607 }, { "epoch": 0.6305799774383584, "grad_norm": 2.31463360786438, "learning_rate": 6.343247996227469e-05, "loss": 1.5766, "step": 17608 }, { "epoch": 0.6306157895679266, "grad_norm": 1.8864593505859375, "learning_rate": 6.342168451369623e-05, "loss": 1.2702, "step": 17609 }, { "epoch": 0.6306516016974949, "grad_norm": 1.3110343217849731, "learning_rate": 6.341088955723189e-05, "loss": 1.0155, "step": 17610 }, { "epoch": 0.6306874138270633, "grad_norm": 1.729931116104126, "learning_rate": 6.340009509302676e-05, "loss": 1.5527, "step": 17611 }, { "epoch": 0.6307232259566316, "grad_norm": 1.4861717224121094, "learning_rate": 6.338930112122622e-05, "loss": 1.3048, "step": 17612 }, { "epoch": 0.6307590380861998, "grad_norm": 1.8507078886032104, "learning_rate": 6.337850764197539e-05, "loss": 1.3849, "step": 17613 }, { "epoch": 0.6307948502157681, "grad_norm": 1.3783198595046997, "learning_rate": 6.336771465541947e-05, "loss": 1.2361, "step": 17614 }, { "epoch": 0.6308306623453364, "grad_norm": 1.9116185903549194, "learning_rate": 6.33569221617037e-05, "loss": 1.5638, "step": 17615 }, { "epoch": 0.6308664744749046, "grad_norm": 1.7067476511001587, "learning_rate": 6.334613016097328e-05, "loss": 1.3243, "step": 17616 }, { "epoch": 0.6309022866044729, "grad_norm": 1.42518150806427, "learning_rate": 6.333533865337343e-05, "loss": 1.5246, "step": 17617 }, { "epoch": 0.6309380987340413, "grad_norm": 1.7045762538909912, "learning_rate": 6.332454763904925e-05, "loss": 1.5706, "step": 17618 }, { "epoch": 0.6309739108636095, "grad_norm": 1.6341084241867065, "learning_rate": 6.3313757118146e-05, "loss": 1.2484, "step": 17619 }, { "epoch": 0.6310097229931778, "grad_norm": 2.272749423980713, "learning_rate": 6.330296709080881e-05, "loss": 1.3682, "step": 17620 }, { "epoch": 0.6310455351227461, "grad_norm": 2.296658992767334, "learning_rate": 6.329217755718291e-05, "loss": 1.847, "step": 17621 }, { "epoch": 0.6310813472523144, "grad_norm": 1.2976129055023193, "learning_rate": 6.328138851741338e-05, "loss": 1.4858, "step": 17622 }, { "epoch": 0.6311171593818826, "grad_norm": 1.4732706546783447, "learning_rate": 6.32705999716454e-05, "loss": 1.0149, "step": 17623 }, { "epoch": 0.6311529715114509, "grad_norm": 2.097350597381592, "learning_rate": 6.325981192002413e-05, "loss": 1.7887, "step": 17624 }, { "epoch": 0.6311887836410193, "grad_norm": 1.4998894929885864, "learning_rate": 6.324902436269469e-05, "loss": 1.47, "step": 17625 }, { "epoch": 0.6312245957705875, "grad_norm": 2.4181065559387207, "learning_rate": 6.323823729980222e-05, "loss": 1.8616, "step": 17626 }, { "epoch": 0.6312604079001558, "grad_norm": 2.439805507659912, "learning_rate": 6.322745073149185e-05, "loss": 1.6384, "step": 17627 }, { "epoch": 0.6312962200297241, "grad_norm": 1.9213478565216064, "learning_rate": 6.321666465790872e-05, "loss": 1.6752, "step": 17628 }, { "epoch": 0.6313320321592923, "grad_norm": 1.7492610216140747, "learning_rate": 6.320587907919788e-05, "loss": 1.6131, "step": 17629 }, { "epoch": 0.6313678442888606, "grad_norm": 1.5439682006835938, "learning_rate": 6.319509399550452e-05, "loss": 1.3714, "step": 17630 }, { "epoch": 0.6314036564184289, "grad_norm": 1.481459617614746, "learning_rate": 6.318430940697367e-05, "loss": 1.449, "step": 17631 }, { "epoch": 0.6314394685479972, "grad_norm": 1.4452996253967285, "learning_rate": 6.317352531375045e-05, "loss": 1.2496, "step": 17632 }, { "epoch": 0.6314752806775655, "grad_norm": 1.4040240049362183, "learning_rate": 6.316274171597995e-05, "loss": 1.4423, "step": 17633 }, { "epoch": 0.6315110928071338, "grad_norm": 1.6512972116470337, "learning_rate": 6.315195861380722e-05, "loss": 1.3448, "step": 17634 }, { "epoch": 0.6315469049367021, "grad_norm": 1.6966760158538818, "learning_rate": 6.314117600737738e-05, "loss": 1.5408, "step": 17635 }, { "epoch": 0.6315827170662703, "grad_norm": 1.709964394569397, "learning_rate": 6.313039389683546e-05, "loss": 1.3916, "step": 17636 }, { "epoch": 0.6316185291958386, "grad_norm": 1.6564950942993164, "learning_rate": 6.311961228232654e-05, "loss": 1.6976, "step": 17637 }, { "epoch": 0.6316543413254069, "grad_norm": 1.531118631362915, "learning_rate": 6.310883116399567e-05, "loss": 1.3207, "step": 17638 }, { "epoch": 0.6316901534549751, "grad_norm": 2.0177090167999268, "learning_rate": 6.309805054198787e-05, "loss": 1.6174, "step": 17639 }, { "epoch": 0.6317259655845435, "grad_norm": 1.2285786867141724, "learning_rate": 6.308727041644824e-05, "loss": 1.6658, "step": 17640 }, { "epoch": 0.6317617777141118, "grad_norm": 1.6047492027282715, "learning_rate": 6.307649078752174e-05, "loss": 1.6755, "step": 17641 }, { "epoch": 0.6317975898436801, "grad_norm": 2.411289691925049, "learning_rate": 6.306571165535343e-05, "loss": 1.4401, "step": 17642 }, { "epoch": 0.6318334019732483, "grad_norm": 1.399155855178833, "learning_rate": 6.305493302008832e-05, "loss": 1.7149, "step": 17643 }, { "epoch": 0.6318692141028166, "grad_norm": 1.4516044855117798, "learning_rate": 6.304415488187145e-05, "loss": 1.4904, "step": 17644 }, { "epoch": 0.6319050262323849, "grad_norm": 1.6470979452133179, "learning_rate": 6.303337724084779e-05, "loss": 1.6618, "step": 17645 }, { "epoch": 0.6319408383619531, "grad_norm": 1.3379909992218018, "learning_rate": 6.302260009716237e-05, "loss": 1.3781, "step": 17646 }, { "epoch": 0.6319766504915215, "grad_norm": 1.534791111946106, "learning_rate": 6.301182345096017e-05, "loss": 1.34, "step": 17647 }, { "epoch": 0.6320124626210898, "grad_norm": 2.221349000930786, "learning_rate": 6.300104730238616e-05, "loss": 1.535, "step": 17648 }, { "epoch": 0.6320482747506581, "grad_norm": 1.749962329864502, "learning_rate": 6.299027165158536e-05, "loss": 1.4936, "step": 17649 }, { "epoch": 0.6320840868802263, "grad_norm": 1.2860403060913086, "learning_rate": 6.297949649870267e-05, "loss": 1.0702, "step": 17650 }, { "epoch": 0.6321198990097946, "grad_norm": 1.5968939065933228, "learning_rate": 6.296872184388315e-05, "loss": 1.4381, "step": 17651 }, { "epoch": 0.6321557111393629, "grad_norm": 1.518203854560852, "learning_rate": 6.295794768727168e-05, "loss": 1.2938, "step": 17652 }, { "epoch": 0.6321915232689311, "grad_norm": 1.8395830392837524, "learning_rate": 6.294717402901325e-05, "loss": 1.5632, "step": 17653 }, { "epoch": 0.6322273353984995, "grad_norm": 1.8332093954086304, "learning_rate": 6.293640086925279e-05, "loss": 1.6127, "step": 17654 }, { "epoch": 0.6322631475280678, "grad_norm": 1.5718899965286255, "learning_rate": 6.292562820813528e-05, "loss": 1.4328, "step": 17655 }, { "epoch": 0.632298959657636, "grad_norm": 2.0594699382781982, "learning_rate": 6.291485604580559e-05, "loss": 1.6376, "step": 17656 }, { "epoch": 0.6323347717872043, "grad_norm": 1.631384253501892, "learning_rate": 6.290408438240869e-05, "loss": 1.513, "step": 17657 }, { "epoch": 0.6323705839167726, "grad_norm": 1.8818942308425903, "learning_rate": 6.289331321808948e-05, "loss": 1.2976, "step": 17658 }, { "epoch": 0.6324063960463409, "grad_norm": 2.297025680541992, "learning_rate": 6.288254255299286e-05, "loss": 1.5297, "step": 17659 }, { "epoch": 0.6324422081759091, "grad_norm": 1.3660603761672974, "learning_rate": 6.287177238726378e-05, "loss": 1.504, "step": 17660 }, { "epoch": 0.6324780203054775, "grad_norm": 1.9709035158157349, "learning_rate": 6.28610027210471e-05, "loss": 1.7812, "step": 17661 }, { "epoch": 0.6325138324350458, "grad_norm": 1.61428964138031, "learning_rate": 6.285023355448772e-05, "loss": 1.4382, "step": 17662 }, { "epoch": 0.632549644564614, "grad_norm": 2.234609365463257, "learning_rate": 6.283946488773051e-05, "loss": 1.5856, "step": 17663 }, { "epoch": 0.6325854566941823, "grad_norm": 1.6669365167617798, "learning_rate": 6.282869672092039e-05, "loss": 1.4216, "step": 17664 }, { "epoch": 0.6326212688237506, "grad_norm": 1.5173592567443848, "learning_rate": 6.281792905420219e-05, "loss": 1.4237, "step": 17665 }, { "epoch": 0.6326570809533189, "grad_norm": 1.7789220809936523, "learning_rate": 6.280716188772082e-05, "loss": 1.406, "step": 17666 }, { "epoch": 0.6326928930828871, "grad_norm": 2.917492389678955, "learning_rate": 6.279639522162111e-05, "loss": 1.6137, "step": 17667 }, { "epoch": 0.6327287052124555, "grad_norm": 1.5990511178970337, "learning_rate": 6.278562905604788e-05, "loss": 1.3547, "step": 17668 }, { "epoch": 0.6327645173420238, "grad_norm": 2.128406047821045, "learning_rate": 6.277486339114605e-05, "loss": 1.417, "step": 17669 }, { "epoch": 0.632800329471592, "grad_norm": 2.0638668537139893, "learning_rate": 6.276409822706038e-05, "loss": 1.6075, "step": 17670 }, { "epoch": 0.6328361416011603, "grad_norm": 2.864104747772217, "learning_rate": 6.275333356393575e-05, "loss": 1.4648, "step": 17671 }, { "epoch": 0.6328719537307286, "grad_norm": 1.8883485794067383, "learning_rate": 6.274256940191696e-05, "loss": 1.5711, "step": 17672 }, { "epoch": 0.6329077658602968, "grad_norm": 1.6292932033538818, "learning_rate": 6.273180574114887e-05, "loss": 1.7285, "step": 17673 }, { "epoch": 0.6329435779898651, "grad_norm": 1.8495104312896729, "learning_rate": 6.272104258177622e-05, "loss": 1.1279, "step": 17674 }, { "epoch": 0.6329793901194335, "grad_norm": 1.819200038909912, "learning_rate": 6.271027992394389e-05, "loss": 1.9726, "step": 17675 }, { "epoch": 0.6330152022490018, "grad_norm": 1.7758945226669312, "learning_rate": 6.269951776779667e-05, "loss": 1.2936, "step": 17676 }, { "epoch": 0.63305101437857, "grad_norm": 1.482681155204773, "learning_rate": 6.268875611347925e-05, "loss": 1.4099, "step": 17677 }, { "epoch": 0.6330868265081383, "grad_norm": 1.7096611261367798, "learning_rate": 6.267799496113656e-05, "loss": 1.5464, "step": 17678 }, { "epoch": 0.6331226386377066, "grad_norm": 1.5019376277923584, "learning_rate": 6.266723431091323e-05, "loss": 1.4345, "step": 17679 }, { "epoch": 0.6331584507672748, "grad_norm": 2.354271173477173, "learning_rate": 6.265647416295417e-05, "loss": 1.5104, "step": 17680 }, { "epoch": 0.6331942628968431, "grad_norm": 1.3241621255874634, "learning_rate": 6.264571451740405e-05, "loss": 1.5164, "step": 17681 }, { "epoch": 0.6332300750264115, "grad_norm": 1.2999305725097656, "learning_rate": 6.263495537440766e-05, "loss": 1.5146, "step": 17682 }, { "epoch": 0.6332658871559798, "grad_norm": 1.9902713298797607, "learning_rate": 6.262419673410976e-05, "loss": 1.5923, "step": 17683 }, { "epoch": 0.633301699285548, "grad_norm": 1.6341190338134766, "learning_rate": 6.261343859665507e-05, "loss": 1.5331, "step": 17684 }, { "epoch": 0.6333375114151163, "grad_norm": 1.755398154258728, "learning_rate": 6.260268096218838e-05, "loss": 1.4803, "step": 17685 }, { "epoch": 0.6333733235446846, "grad_norm": 1.864999532699585, "learning_rate": 6.259192383085432e-05, "loss": 1.4069, "step": 17686 }, { "epoch": 0.6334091356742528, "grad_norm": 1.3991942405700684, "learning_rate": 6.258116720279773e-05, "loss": 1.1271, "step": 17687 }, { "epoch": 0.6334449478038211, "grad_norm": 1.9088125228881836, "learning_rate": 6.257041107816319e-05, "loss": 1.548, "step": 17688 }, { "epoch": 0.6334807599333895, "grad_norm": 2.0545427799224854, "learning_rate": 6.255965545709556e-05, "loss": 1.441, "step": 17689 }, { "epoch": 0.6335165720629578, "grad_norm": 1.577840805053711, "learning_rate": 6.254890033973942e-05, "loss": 1.6864, "step": 17690 }, { "epoch": 0.633552384192526, "grad_norm": 1.5293114185333252, "learning_rate": 6.253814572623958e-05, "loss": 1.6347, "step": 17691 }, { "epoch": 0.6335881963220943, "grad_norm": 1.3366191387176514, "learning_rate": 6.252739161674059e-05, "loss": 1.344, "step": 17692 }, { "epoch": 0.6336240084516626, "grad_norm": 1.6025129556655884, "learning_rate": 6.251663801138725e-05, "loss": 1.5063, "step": 17693 }, { "epoch": 0.6336598205812308, "grad_norm": 1.4043288230895996, "learning_rate": 6.250588491032421e-05, "loss": 1.5331, "step": 17694 }, { "epoch": 0.6336956327107991, "grad_norm": 1.5216609239578247, "learning_rate": 6.249513231369608e-05, "loss": 1.4133, "step": 17695 }, { "epoch": 0.6337314448403675, "grad_norm": 1.466712236404419, "learning_rate": 6.248438022164763e-05, "loss": 1.3336, "step": 17696 }, { "epoch": 0.6337672569699357, "grad_norm": 2.017925262451172, "learning_rate": 6.247362863432337e-05, "loss": 1.5305, "step": 17697 }, { "epoch": 0.633803069099504, "grad_norm": 1.440697431564331, "learning_rate": 6.246287755186813e-05, "loss": 1.379, "step": 17698 }, { "epoch": 0.6338388812290723, "grad_norm": 1.7415283918380737, "learning_rate": 6.245212697442637e-05, "loss": 1.4176, "step": 17699 }, { "epoch": 0.6338746933586406, "grad_norm": 1.6304659843444824, "learning_rate": 6.244137690214287e-05, "loss": 1.2807, "step": 17700 }, { "epoch": 0.6339105054882088, "grad_norm": 2.0391323566436768, "learning_rate": 6.243062733516211e-05, "loss": 1.5144, "step": 17701 }, { "epoch": 0.6339463176177771, "grad_norm": 1.8402481079101562, "learning_rate": 6.24198782736289e-05, "loss": 1.3738, "step": 17702 }, { "epoch": 0.6339821297473455, "grad_norm": 1.4574590921401978, "learning_rate": 6.240912971768771e-05, "loss": 1.1814, "step": 17703 }, { "epoch": 0.6340179418769137, "grad_norm": 2.0565900802612305, "learning_rate": 6.239838166748318e-05, "loss": 1.086, "step": 17704 }, { "epoch": 0.634053754006482, "grad_norm": 1.5707656145095825, "learning_rate": 6.238763412315993e-05, "loss": 1.2242, "step": 17705 }, { "epoch": 0.6340895661360503, "grad_norm": 2.836914539337158, "learning_rate": 6.237688708486252e-05, "loss": 1.5508, "step": 17706 }, { "epoch": 0.6341253782656185, "grad_norm": 1.3280764818191528, "learning_rate": 6.236614055273562e-05, "loss": 1.2026, "step": 17707 }, { "epoch": 0.6341611903951868, "grad_norm": 2.0606489181518555, "learning_rate": 6.23553945269237e-05, "loss": 1.6827, "step": 17708 }, { "epoch": 0.6341970025247551, "grad_norm": 1.3689779043197632, "learning_rate": 6.234464900757144e-05, "loss": 1.4438, "step": 17709 }, { "epoch": 0.6342328146543235, "grad_norm": 1.415412425994873, "learning_rate": 6.233390399482334e-05, "loss": 1.3191, "step": 17710 }, { "epoch": 0.6342686267838917, "grad_norm": 1.9321517944335938, "learning_rate": 6.232315948882394e-05, "loss": 1.485, "step": 17711 }, { "epoch": 0.63430443891346, "grad_norm": 1.5414435863494873, "learning_rate": 6.231241548971788e-05, "loss": 1.3746, "step": 17712 }, { "epoch": 0.6343402510430283, "grad_norm": 1.7745535373687744, "learning_rate": 6.230167199764962e-05, "loss": 1.5908, "step": 17713 }, { "epoch": 0.6343760631725965, "grad_norm": 1.9899230003356934, "learning_rate": 6.229092901276376e-05, "loss": 1.6602, "step": 17714 }, { "epoch": 0.6344118753021648, "grad_norm": 2.1202757358551025, "learning_rate": 6.228018653520477e-05, "loss": 1.376, "step": 17715 }, { "epoch": 0.6344476874317331, "grad_norm": 1.5492504835128784, "learning_rate": 6.226944456511725e-05, "loss": 1.4702, "step": 17716 }, { "epoch": 0.6344834995613015, "grad_norm": 1.687817931175232, "learning_rate": 6.225870310264567e-05, "loss": 1.3696, "step": 17717 }, { "epoch": 0.6345193116908697, "grad_norm": 2.061065435409546, "learning_rate": 6.224796214793458e-05, "loss": 1.7091, "step": 17718 }, { "epoch": 0.634555123820438, "grad_norm": 1.6443705558776855, "learning_rate": 6.223722170112845e-05, "loss": 1.4229, "step": 17719 }, { "epoch": 0.6345909359500063, "grad_norm": 1.9211294651031494, "learning_rate": 6.222648176237179e-05, "loss": 1.6184, "step": 17720 }, { "epoch": 0.6346267480795745, "grad_norm": 1.7323452234268188, "learning_rate": 6.221574233180907e-05, "loss": 1.5455, "step": 17721 }, { "epoch": 0.6346625602091428, "grad_norm": 1.5966200828552246, "learning_rate": 6.220500340958482e-05, "loss": 1.3132, "step": 17722 }, { "epoch": 0.6346983723387111, "grad_norm": 1.7405643463134766, "learning_rate": 6.219426499584351e-05, "loss": 1.5249, "step": 17723 }, { "epoch": 0.6347341844682794, "grad_norm": 1.7434638738632202, "learning_rate": 6.218352709072957e-05, "loss": 1.6002, "step": 17724 }, { "epoch": 0.6347699965978477, "grad_norm": 1.4588823318481445, "learning_rate": 6.21727896943875e-05, "loss": 1.2939, "step": 17725 }, { "epoch": 0.634805808727416, "grad_norm": 1.711930751800537, "learning_rate": 6.216205280696177e-05, "loss": 1.2147, "step": 17726 }, { "epoch": 0.6348416208569843, "grad_norm": 1.7398806810379028, "learning_rate": 6.21513164285968e-05, "loss": 1.5812, "step": 17727 }, { "epoch": 0.6348774329865525, "grad_norm": 1.6372019052505493, "learning_rate": 6.214058055943706e-05, "loss": 1.6223, "step": 17728 }, { "epoch": 0.6349132451161208, "grad_norm": 1.3632045984268188, "learning_rate": 6.212984519962695e-05, "loss": 1.4353, "step": 17729 }, { "epoch": 0.6349490572456891, "grad_norm": 1.4072738885879517, "learning_rate": 6.211911034931094e-05, "loss": 1.4157, "step": 17730 }, { "epoch": 0.6349848693752574, "grad_norm": 1.8789604902267456, "learning_rate": 6.210837600863342e-05, "loss": 1.3836, "step": 17731 }, { "epoch": 0.6350206815048257, "grad_norm": 1.5280240774154663, "learning_rate": 6.209764217773884e-05, "loss": 1.4962, "step": 17732 }, { "epoch": 0.635056493634394, "grad_norm": 1.6792149543762207, "learning_rate": 6.208690885677158e-05, "loss": 1.3537, "step": 17733 }, { "epoch": 0.6350923057639623, "grad_norm": 2.1413023471832275, "learning_rate": 6.207617604587607e-05, "loss": 1.4218, "step": 17734 }, { "epoch": 0.6351281178935305, "grad_norm": 1.7607442140579224, "learning_rate": 6.20654437451967e-05, "loss": 1.6476, "step": 17735 }, { "epoch": 0.6351639300230988, "grad_norm": 1.53827965259552, "learning_rate": 6.205471195487784e-05, "loss": 1.5853, "step": 17736 }, { "epoch": 0.6351997421526671, "grad_norm": 1.5775997638702393, "learning_rate": 6.204398067506389e-05, "loss": 1.5806, "step": 17737 }, { "epoch": 0.6352355542822354, "grad_norm": 1.4330687522888184, "learning_rate": 6.203324990589922e-05, "loss": 1.4459, "step": 17738 }, { "epoch": 0.6352713664118037, "grad_norm": 1.8977481126785278, "learning_rate": 6.20225196475282e-05, "loss": 1.4269, "step": 17739 }, { "epoch": 0.635307178541372, "grad_norm": 1.7836785316467285, "learning_rate": 6.201178990009518e-05, "loss": 1.547, "step": 17740 }, { "epoch": 0.6353429906709402, "grad_norm": 1.7709898948669434, "learning_rate": 6.200106066374454e-05, "loss": 1.4268, "step": 17741 }, { "epoch": 0.6353788028005085, "grad_norm": 2.1804091930389404, "learning_rate": 6.199033193862059e-05, "loss": 1.4339, "step": 17742 }, { "epoch": 0.6354146149300768, "grad_norm": 1.4796829223632812, "learning_rate": 6.197960372486772e-05, "loss": 1.3443, "step": 17743 }, { "epoch": 0.635450427059645, "grad_norm": 1.9017022848129272, "learning_rate": 6.196887602263022e-05, "loss": 1.4565, "step": 17744 }, { "epoch": 0.6354862391892134, "grad_norm": 1.6325371265411377, "learning_rate": 6.195814883205245e-05, "loss": 1.6163, "step": 17745 }, { "epoch": 0.6355220513187817, "grad_norm": 2.607729196548462, "learning_rate": 6.194742215327873e-05, "loss": 1.211, "step": 17746 }, { "epoch": 0.63555786344835, "grad_norm": 1.6463838815689087, "learning_rate": 6.193669598645334e-05, "loss": 1.5084, "step": 17747 }, { "epoch": 0.6355936755779182, "grad_norm": 1.6740944385528564, "learning_rate": 6.19259703317206e-05, "loss": 1.3549, "step": 17748 }, { "epoch": 0.6356294877074865, "grad_norm": 2.299992561340332, "learning_rate": 6.191524518922482e-05, "loss": 1.4458, "step": 17749 }, { "epoch": 0.6356652998370548, "grad_norm": 1.5784881114959717, "learning_rate": 6.190452055911031e-05, "loss": 1.5335, "step": 17750 }, { "epoch": 0.635701111966623, "grad_norm": 1.283570647239685, "learning_rate": 6.189379644152132e-05, "loss": 1.3831, "step": 17751 }, { "epoch": 0.6357369240961914, "grad_norm": 1.497164249420166, "learning_rate": 6.188307283660216e-05, "loss": 1.491, "step": 17752 }, { "epoch": 0.6357727362257597, "grad_norm": 1.6668617725372314, "learning_rate": 6.187234974449707e-05, "loss": 1.4718, "step": 17753 }, { "epoch": 0.635808548355328, "grad_norm": 1.4830564260482788, "learning_rate": 6.186162716535036e-05, "loss": 1.3756, "step": 17754 }, { "epoch": 0.6358443604848962, "grad_norm": 1.7476052045822144, "learning_rate": 6.185090509930624e-05, "loss": 1.497, "step": 17755 }, { "epoch": 0.6358801726144645, "grad_norm": 1.7061628103256226, "learning_rate": 6.184018354650898e-05, "loss": 1.3756, "step": 17756 }, { "epoch": 0.6359159847440328, "grad_norm": 1.9672883749008179, "learning_rate": 6.182946250710284e-05, "loss": 1.6373, "step": 17757 }, { "epoch": 0.635951796873601, "grad_norm": 1.8433897495269775, "learning_rate": 6.181874198123203e-05, "loss": 1.8807, "step": 17758 }, { "epoch": 0.6359876090031694, "grad_norm": 1.7661159038543701, "learning_rate": 6.18080219690408e-05, "loss": 1.4735, "step": 17759 }, { "epoch": 0.6360234211327377, "grad_norm": 2.0995090007781982, "learning_rate": 6.179730247067336e-05, "loss": 1.3749, "step": 17760 }, { "epoch": 0.636059233262306, "grad_norm": 1.3635008335113525, "learning_rate": 6.178658348627398e-05, "loss": 1.3731, "step": 17761 }, { "epoch": 0.6360950453918742, "grad_norm": 2.132258892059326, "learning_rate": 6.177586501598679e-05, "loss": 1.3157, "step": 17762 }, { "epoch": 0.6361308575214425, "grad_norm": 1.4567443132400513, "learning_rate": 6.176514705995604e-05, "loss": 1.7088, "step": 17763 }, { "epoch": 0.6361666696510108, "grad_norm": 1.368294358253479, "learning_rate": 6.175442961832593e-05, "loss": 1.4436, "step": 17764 }, { "epoch": 0.636202481780579, "grad_norm": 1.7212412357330322, "learning_rate": 6.174371269124061e-05, "loss": 1.5667, "step": 17765 }, { "epoch": 0.6362382939101474, "grad_norm": 1.3152505159378052, "learning_rate": 6.173299627884432e-05, "loss": 1.3987, "step": 17766 }, { "epoch": 0.6362741060397157, "grad_norm": 1.347125768661499, "learning_rate": 6.172228038128118e-05, "loss": 1.3405, "step": 17767 }, { "epoch": 0.636309918169284, "grad_norm": 1.3054901361465454, "learning_rate": 6.171156499869539e-05, "loss": 1.4982, "step": 17768 }, { "epoch": 0.6363457302988522, "grad_norm": 1.41067636013031, "learning_rate": 6.17008501312311e-05, "loss": 1.1198, "step": 17769 }, { "epoch": 0.6363815424284205, "grad_norm": 1.8490687608718872, "learning_rate": 6.169013577903248e-05, "loss": 1.6673, "step": 17770 }, { "epoch": 0.6364173545579888, "grad_norm": 2.312915325164795, "learning_rate": 6.167942194224365e-05, "loss": 1.4044, "step": 17771 }, { "epoch": 0.636453166687557, "grad_norm": 1.7895601987838745, "learning_rate": 6.16687086210088e-05, "loss": 1.634, "step": 17772 }, { "epoch": 0.6364889788171254, "grad_norm": 1.7349082231521606, "learning_rate": 6.165799581547203e-05, "loss": 1.4799, "step": 17773 }, { "epoch": 0.6365247909466937, "grad_norm": 1.9803053140640259, "learning_rate": 6.164728352577743e-05, "loss": 1.4382, "step": 17774 }, { "epoch": 0.6365606030762619, "grad_norm": 2.0456080436706543, "learning_rate": 6.16365717520692e-05, "loss": 1.2846, "step": 17775 }, { "epoch": 0.6365964152058302, "grad_norm": 2.208461046218872, "learning_rate": 6.162586049449136e-05, "loss": 1.3596, "step": 17776 }, { "epoch": 0.6366322273353985, "grad_norm": 1.5197937488555908, "learning_rate": 6.161514975318809e-05, "loss": 1.5683, "step": 17777 }, { "epoch": 0.6366680394649668, "grad_norm": 1.7372349500656128, "learning_rate": 6.160443952830347e-05, "loss": 1.4085, "step": 17778 }, { "epoch": 0.636703851594535, "grad_norm": 1.7026249170303345, "learning_rate": 6.159372981998161e-05, "loss": 1.5112, "step": 17779 }, { "epoch": 0.6367396637241034, "grad_norm": 1.795798897743225, "learning_rate": 6.158302062836654e-05, "loss": 1.5585, "step": 17780 }, { "epoch": 0.6367754758536717, "grad_norm": 1.8992228507995605, "learning_rate": 6.157231195360241e-05, "loss": 1.6058, "step": 17781 }, { "epoch": 0.6368112879832399, "grad_norm": 1.8138514757156372, "learning_rate": 6.156160379583325e-05, "loss": 1.3325, "step": 17782 }, { "epoch": 0.6368471001128082, "grad_norm": 1.941710114479065, "learning_rate": 6.155089615520308e-05, "loss": 1.449, "step": 17783 }, { "epoch": 0.6368829122423765, "grad_norm": 1.922827124595642, "learning_rate": 6.154018903185608e-05, "loss": 1.1852, "step": 17784 }, { "epoch": 0.6369187243719447, "grad_norm": 1.9365390539169312, "learning_rate": 6.152948242593615e-05, "loss": 1.6456, "step": 17785 }, { "epoch": 0.636954536501513, "grad_norm": 1.8548115491867065, "learning_rate": 6.15187763375875e-05, "loss": 1.6585, "step": 17786 }, { "epoch": 0.6369903486310814, "grad_norm": 1.6139572858810425, "learning_rate": 6.150807076695399e-05, "loss": 1.3493, "step": 17787 }, { "epoch": 0.6370261607606497, "grad_norm": 1.368597149848938, "learning_rate": 6.149736571417979e-05, "loss": 1.3896, "step": 17788 }, { "epoch": 0.6370619728902179, "grad_norm": 1.8540292978286743, "learning_rate": 6.148666117940882e-05, "loss": 1.3981, "step": 17789 }, { "epoch": 0.6370977850197862, "grad_norm": 1.3378417491912842, "learning_rate": 6.147595716278519e-05, "loss": 1.302, "step": 17790 }, { "epoch": 0.6371335971493545, "grad_norm": 1.6063618659973145, "learning_rate": 6.146525366445288e-05, "loss": 1.5243, "step": 17791 }, { "epoch": 0.6371694092789227, "grad_norm": 2.179349660873413, "learning_rate": 6.145455068455583e-05, "loss": 1.31, "step": 17792 }, { "epoch": 0.637205221408491, "grad_norm": 1.9033253192901611, "learning_rate": 6.144384822323812e-05, "loss": 1.2234, "step": 17793 }, { "epoch": 0.6372410335380594, "grad_norm": 1.3805716037750244, "learning_rate": 6.143314628064365e-05, "loss": 1.5376, "step": 17794 }, { "epoch": 0.6372768456676277, "grad_norm": 1.3206768035888672, "learning_rate": 6.14224448569165e-05, "loss": 1.4238, "step": 17795 }, { "epoch": 0.6373126577971959, "grad_norm": 1.6055296659469604, "learning_rate": 6.141174395220053e-05, "loss": 1.451, "step": 17796 }, { "epoch": 0.6373484699267642, "grad_norm": 1.5268259048461914, "learning_rate": 6.140104356663984e-05, "loss": 1.3718, "step": 17797 }, { "epoch": 0.6373842820563325, "grad_norm": 1.8566981554031372, "learning_rate": 6.139034370037826e-05, "loss": 1.4693, "step": 17798 }, { "epoch": 0.6374200941859007, "grad_norm": 1.3930617570877075, "learning_rate": 6.137964435355984e-05, "loss": 1.7598, "step": 17799 }, { "epoch": 0.637455906315469, "grad_norm": 2.081630229949951, "learning_rate": 6.13689455263285e-05, "loss": 1.3816, "step": 17800 }, { "epoch": 0.6374917184450374, "grad_norm": 1.8918607234954834, "learning_rate": 6.135824721882815e-05, "loss": 1.487, "step": 17801 }, { "epoch": 0.6375275305746056, "grad_norm": 1.4612147808074951, "learning_rate": 6.134754943120273e-05, "loss": 1.7173, "step": 17802 }, { "epoch": 0.6375633427041739, "grad_norm": 1.6461340188980103, "learning_rate": 6.133685216359615e-05, "loss": 1.3156, "step": 17803 }, { "epoch": 0.6375991548337422, "grad_norm": 2.132502317428589, "learning_rate": 6.132615541615242e-05, "loss": 1.617, "step": 17804 }, { "epoch": 0.6376349669633105, "grad_norm": 1.3095436096191406, "learning_rate": 6.131545918901531e-05, "loss": 1.4064, "step": 17805 }, { "epoch": 0.6376707790928787, "grad_norm": 1.5461561679840088, "learning_rate": 6.130476348232887e-05, "loss": 1.291, "step": 17806 }, { "epoch": 0.637706591222447, "grad_norm": 1.4045078754425049, "learning_rate": 6.12940682962369e-05, "loss": 1.2808, "step": 17807 }, { "epoch": 0.6377424033520154, "grad_norm": 1.6871155500411987, "learning_rate": 6.128337363088327e-05, "loss": 1.5045, "step": 17808 }, { "epoch": 0.6377782154815836, "grad_norm": 1.3531205654144287, "learning_rate": 6.127267948641195e-05, "loss": 1.3646, "step": 17809 }, { "epoch": 0.6378140276111519, "grad_norm": 1.4474282264709473, "learning_rate": 6.126198586296676e-05, "loss": 1.2597, "step": 17810 }, { "epoch": 0.6378498397407202, "grad_norm": 1.5817090272903442, "learning_rate": 6.12512927606916e-05, "loss": 1.4625, "step": 17811 }, { "epoch": 0.6378856518702885, "grad_norm": 1.6557588577270508, "learning_rate": 6.124060017973027e-05, "loss": 1.8598, "step": 17812 }, { "epoch": 0.6379214639998567, "grad_norm": 1.735060453414917, "learning_rate": 6.122990812022671e-05, "loss": 1.4263, "step": 17813 }, { "epoch": 0.637957276129425, "grad_norm": 1.6962659358978271, "learning_rate": 6.12192165823247e-05, "loss": 1.4953, "step": 17814 }, { "epoch": 0.6379930882589934, "grad_norm": 2.5950944423675537, "learning_rate": 6.120852556616811e-05, "loss": 1.6169, "step": 17815 }, { "epoch": 0.6380289003885616, "grad_norm": 1.4274699687957764, "learning_rate": 6.11978350719008e-05, "loss": 1.2505, "step": 17816 }, { "epoch": 0.6380647125181299, "grad_norm": 2.5493216514587402, "learning_rate": 6.118714509966654e-05, "loss": 1.3164, "step": 17817 }, { "epoch": 0.6381005246476982, "grad_norm": 2.081235647201538, "learning_rate": 6.117645564960919e-05, "loss": 1.1562, "step": 17818 }, { "epoch": 0.6381363367772664, "grad_norm": 1.0999113321304321, "learning_rate": 6.116576672187254e-05, "loss": 1.5064, "step": 17819 }, { "epoch": 0.6381721489068347, "grad_norm": 1.29554283618927, "learning_rate": 6.115507831660042e-05, "loss": 1.4769, "step": 17820 }, { "epoch": 0.638207961036403, "grad_norm": 1.3888424634933472, "learning_rate": 6.11443904339366e-05, "loss": 1.8104, "step": 17821 }, { "epoch": 0.6382437731659714, "grad_norm": 1.9146857261657715, "learning_rate": 6.11337030740249e-05, "loss": 1.4991, "step": 17822 }, { "epoch": 0.6382795852955396, "grad_norm": 1.6722118854522705, "learning_rate": 6.112301623700907e-05, "loss": 1.4583, "step": 17823 }, { "epoch": 0.6383153974251079, "grad_norm": 1.50333833694458, "learning_rate": 6.111232992303292e-05, "loss": 1.4124, "step": 17824 }, { "epoch": 0.6383512095546762, "grad_norm": 1.567192792892456, "learning_rate": 6.110164413224025e-05, "loss": 1.3424, "step": 17825 }, { "epoch": 0.6383870216842444, "grad_norm": 2.029167652130127, "learning_rate": 6.109095886477472e-05, "loss": 1.6334, "step": 17826 }, { "epoch": 0.6384228338138127, "grad_norm": 1.5533322095870972, "learning_rate": 6.108027412078018e-05, "loss": 1.5, "step": 17827 }, { "epoch": 0.638458645943381, "grad_norm": 1.8610965013504028, "learning_rate": 6.106958990040033e-05, "loss": 1.6345, "step": 17828 }, { "epoch": 0.6384944580729494, "grad_norm": 1.191743016242981, "learning_rate": 6.105890620377897e-05, "loss": 1.3893, "step": 17829 }, { "epoch": 0.6385302702025176, "grad_norm": 1.9871865510940552, "learning_rate": 6.104822303105974e-05, "loss": 1.4636, "step": 17830 }, { "epoch": 0.6385660823320859, "grad_norm": 1.702633023262024, "learning_rate": 6.103754038238648e-05, "loss": 1.4101, "step": 17831 }, { "epoch": 0.6386018944616542, "grad_norm": 2.4105355739593506, "learning_rate": 6.102685825790282e-05, "loss": 1.3532, "step": 17832 }, { "epoch": 0.6386377065912224, "grad_norm": 1.8922439813613892, "learning_rate": 6.1016176657752534e-05, "loss": 1.4376, "step": 17833 }, { "epoch": 0.6386735187207907, "grad_norm": 2.383431911468506, "learning_rate": 6.100549558207931e-05, "loss": 1.3371, "step": 17834 }, { "epoch": 0.638709330850359, "grad_norm": 2.351727247238159, "learning_rate": 6.099481503102682e-05, "loss": 1.5323, "step": 17835 }, { "epoch": 0.6387451429799273, "grad_norm": 2.596125364303589, "learning_rate": 6.0984135004738784e-05, "loss": 1.7484, "step": 17836 }, { "epoch": 0.6387809551094956, "grad_norm": 1.5746511220932007, "learning_rate": 6.097345550335889e-05, "loss": 1.2177, "step": 17837 }, { "epoch": 0.6388167672390639, "grad_norm": 1.227081298828125, "learning_rate": 6.096277652703082e-05, "loss": 1.6711, "step": 17838 }, { "epoch": 0.6388525793686322, "grad_norm": 2.1082305908203125, "learning_rate": 6.0952098075898214e-05, "loss": 1.598, "step": 17839 }, { "epoch": 0.6388883914982004, "grad_norm": 1.3371353149414062, "learning_rate": 6.0941420150104776e-05, "loss": 1.3421, "step": 17840 }, { "epoch": 0.6389242036277687, "grad_norm": 1.6144399642944336, "learning_rate": 6.0930742749794145e-05, "loss": 1.352, "step": 17841 }, { "epoch": 0.638960015757337, "grad_norm": 1.4856688976287842, "learning_rate": 6.0920065875109986e-05, "loss": 1.5137, "step": 17842 }, { "epoch": 0.6389958278869053, "grad_norm": 2.919574737548828, "learning_rate": 6.0909389526195935e-05, "loss": 1.4141, "step": 17843 }, { "epoch": 0.6390316400164736, "grad_norm": 1.6059921979904175, "learning_rate": 6.0898713703195595e-05, "loss": 1.6238, "step": 17844 }, { "epoch": 0.6390674521460419, "grad_norm": 2.180042028427124, "learning_rate": 6.0888038406252656e-05, "loss": 1.2913, "step": 17845 }, { "epoch": 0.6391032642756102, "grad_norm": 1.7399688959121704, "learning_rate": 6.087736363551069e-05, "loss": 1.5325, "step": 17846 }, { "epoch": 0.6391390764051784, "grad_norm": 2.026592254638672, "learning_rate": 6.086668939111333e-05, "loss": 1.5609, "step": 17847 }, { "epoch": 0.6391748885347467, "grad_norm": 1.4536274671554565, "learning_rate": 6.085601567320418e-05, "loss": 1.2754, "step": 17848 }, { "epoch": 0.639210700664315, "grad_norm": 1.4030091762542725, "learning_rate": 6.084534248192688e-05, "loss": 1.3692, "step": 17849 }, { "epoch": 0.6392465127938833, "grad_norm": 1.6104986667633057, "learning_rate": 6.083466981742496e-05, "loss": 1.6165, "step": 17850 }, { "epoch": 0.6392823249234516, "grad_norm": 1.5701024532318115, "learning_rate": 6.082399767984206e-05, "loss": 1.5008, "step": 17851 }, { "epoch": 0.6393181370530199, "grad_norm": 2.5654051303863525, "learning_rate": 6.081332606932173e-05, "loss": 1.4486, "step": 17852 }, { "epoch": 0.6393539491825881, "grad_norm": 1.540667176246643, "learning_rate": 6.0802654986007534e-05, "loss": 1.5323, "step": 17853 }, { "epoch": 0.6393897613121564, "grad_norm": 1.600110411643982, "learning_rate": 6.079198443004308e-05, "loss": 1.4927, "step": 17854 }, { "epoch": 0.6394255734417247, "grad_norm": 1.3567900657653809, "learning_rate": 6.0781314401571875e-05, "loss": 1.5003, "step": 17855 }, { "epoch": 0.639461385571293, "grad_norm": 3.0247714519500732, "learning_rate": 6.077064490073752e-05, "loss": 1.5352, "step": 17856 }, { "epoch": 0.6394971977008613, "grad_norm": 2.202338695526123, "learning_rate": 6.075997592768352e-05, "loss": 1.8075, "step": 17857 }, { "epoch": 0.6395330098304296, "grad_norm": 1.6735132932662964, "learning_rate": 6.074930748255343e-05, "loss": 1.3196, "step": 17858 }, { "epoch": 0.6395688219599979, "grad_norm": 1.94517982006073, "learning_rate": 6.073863956549077e-05, "loss": 1.506, "step": 17859 }, { "epoch": 0.6396046340895661, "grad_norm": 2.440854787826538, "learning_rate": 6.07279721766391e-05, "loss": 1.6454, "step": 17860 }, { "epoch": 0.6396404462191344, "grad_norm": 1.7555967569351196, "learning_rate": 6.071730531614189e-05, "loss": 1.4305, "step": 17861 }, { "epoch": 0.6396762583487027, "grad_norm": 1.5704050064086914, "learning_rate": 6.070663898414266e-05, "loss": 1.373, "step": 17862 }, { "epoch": 0.6397120704782709, "grad_norm": 2.053732395172119, "learning_rate": 6.069597318078493e-05, "loss": 1.4501, "step": 17863 }, { "epoch": 0.6397478826078393, "grad_norm": 1.6441607475280762, "learning_rate": 6.0685307906212163e-05, "loss": 1.1916, "step": 17864 }, { "epoch": 0.6397836947374076, "grad_norm": 1.6410545110702515, "learning_rate": 6.067464316056789e-05, "loss": 1.2819, "step": 17865 }, { "epoch": 0.6398195068669759, "grad_norm": 1.6242485046386719, "learning_rate": 6.066397894399553e-05, "loss": 1.5297, "step": 17866 }, { "epoch": 0.6398553189965441, "grad_norm": 1.47056245803833, "learning_rate": 6.065331525663864e-05, "loss": 1.6764, "step": 17867 }, { "epoch": 0.6398911311261124, "grad_norm": 1.3741710186004639, "learning_rate": 6.064265209864061e-05, "loss": 1.3427, "step": 17868 }, { "epoch": 0.6399269432556807, "grad_norm": 1.7579123973846436, "learning_rate": 6.063198947014495e-05, "loss": 1.9434, "step": 17869 }, { "epoch": 0.6399627553852489, "grad_norm": 1.678293228149414, "learning_rate": 6.06213273712951e-05, "loss": 1.2601, "step": 17870 }, { "epoch": 0.6399985675148173, "grad_norm": 1.5967705249786377, "learning_rate": 6.061066580223445e-05, "loss": 1.5347, "step": 17871 }, { "epoch": 0.6400343796443856, "grad_norm": 1.7844775915145874, "learning_rate": 6.0600004763106524e-05, "loss": 1.3676, "step": 17872 }, { "epoch": 0.6400701917739539, "grad_norm": 1.7952393293380737, "learning_rate": 6.058934425405467e-05, "loss": 1.6115, "step": 17873 }, { "epoch": 0.6401060039035221, "grad_norm": 1.9247483015060425, "learning_rate": 6.0578684275222376e-05, "loss": 1.2821, "step": 17874 }, { "epoch": 0.6401418160330904, "grad_norm": 1.4412040710449219, "learning_rate": 6.056802482675303e-05, "loss": 1.4646, "step": 17875 }, { "epoch": 0.6401776281626587, "grad_norm": 1.828956127166748, "learning_rate": 6.055736590879007e-05, "loss": 1.856, "step": 17876 }, { "epoch": 0.6402134402922269, "grad_norm": 1.5418649911880493, "learning_rate": 6.0546707521476844e-05, "loss": 1.4854, "step": 17877 }, { "epoch": 0.6402492524217953, "grad_norm": 1.4142812490463257, "learning_rate": 6.0536049664956797e-05, "loss": 1.3271, "step": 17878 }, { "epoch": 0.6402850645513636, "grad_norm": 2.0951547622680664, "learning_rate": 6.052539233937331e-05, "loss": 1.2959, "step": 17879 }, { "epoch": 0.6403208766809318, "grad_norm": 1.931695580482483, "learning_rate": 6.0514735544869706e-05, "loss": 1.6754, "step": 17880 }, { "epoch": 0.6403566888105001, "grad_norm": 2.0580785274505615, "learning_rate": 6.0504079281589454e-05, "loss": 1.4303, "step": 17881 }, { "epoch": 0.6403925009400684, "grad_norm": 1.1910802125930786, "learning_rate": 6.049342354967581e-05, "loss": 1.6201, "step": 17882 }, { "epoch": 0.6404283130696367, "grad_norm": 1.6602959632873535, "learning_rate": 6.0482768349272256e-05, "loss": 1.3145, "step": 17883 }, { "epoch": 0.6404641251992049, "grad_norm": 1.779673457145691, "learning_rate": 6.047211368052201e-05, "loss": 1.4677, "step": 17884 }, { "epoch": 0.6404999373287733, "grad_norm": 1.6596450805664062, "learning_rate": 6.0461459543568566e-05, "loss": 1.5077, "step": 17885 }, { "epoch": 0.6405357494583416, "grad_norm": 3.11470103263855, "learning_rate": 6.04508059385551e-05, "loss": 1.6707, "step": 17886 }, { "epoch": 0.6405715615879098, "grad_norm": 1.251984715461731, "learning_rate": 6.0440152865625076e-05, "loss": 1.5564, "step": 17887 }, { "epoch": 0.6406073737174781, "grad_norm": 2.3035483360290527, "learning_rate": 6.042950032492179e-05, "loss": 1.3278, "step": 17888 }, { "epoch": 0.6406431858470464, "grad_norm": 1.6876386404037476, "learning_rate": 6.041884831658848e-05, "loss": 1.4541, "step": 17889 }, { "epoch": 0.6406789979766147, "grad_norm": 1.6969448328018188, "learning_rate": 6.040819684076856e-05, "loss": 1.7653, "step": 17890 }, { "epoch": 0.6407148101061829, "grad_norm": 1.8200666904449463, "learning_rate": 6.039754589760522e-05, "loss": 1.4405, "step": 17891 }, { "epoch": 0.6407506222357513, "grad_norm": 1.7832893133163452, "learning_rate": 6.038689548724189e-05, "loss": 1.5234, "step": 17892 }, { "epoch": 0.6407864343653196, "grad_norm": 1.9729869365692139, "learning_rate": 6.037624560982171e-05, "loss": 1.4516, "step": 17893 }, { "epoch": 0.6408222464948878, "grad_norm": 1.648978590965271, "learning_rate": 6.03655962654881e-05, "loss": 1.7351, "step": 17894 }, { "epoch": 0.6408580586244561, "grad_norm": 1.5560195446014404, "learning_rate": 6.035494745438421e-05, "loss": 1.4883, "step": 17895 }, { "epoch": 0.6408938707540244, "grad_norm": 1.6403807401657104, "learning_rate": 6.034429917665342e-05, "loss": 1.7517, "step": 17896 }, { "epoch": 0.6409296828835926, "grad_norm": 2.9381628036499023, "learning_rate": 6.033365143243891e-05, "loss": 1.672, "step": 17897 }, { "epoch": 0.6409654950131609, "grad_norm": 1.4989262819290161, "learning_rate": 6.0323004221883936e-05, "loss": 1.1939, "step": 17898 }, { "epoch": 0.6410013071427293, "grad_norm": 1.9374892711639404, "learning_rate": 6.031235754513178e-05, "loss": 1.4359, "step": 17899 }, { "epoch": 0.6410371192722976, "grad_norm": 1.6095060110092163, "learning_rate": 6.030171140232562e-05, "loss": 1.5939, "step": 17900 }, { "epoch": 0.6410729314018658, "grad_norm": 1.5997309684753418, "learning_rate": 6.029106579360879e-05, "loss": 1.1778, "step": 17901 }, { "epoch": 0.6411087435314341, "grad_norm": 2.4153220653533936, "learning_rate": 6.028042071912439e-05, "loss": 1.6604, "step": 17902 }, { "epoch": 0.6411445556610024, "grad_norm": 1.8344680070877075, "learning_rate": 6.026977617901575e-05, "loss": 1.4529, "step": 17903 }, { "epoch": 0.6411803677905706, "grad_norm": 1.8805102109909058, "learning_rate": 6.0259132173426006e-05, "loss": 1.5929, "step": 17904 }, { "epoch": 0.6412161799201389, "grad_norm": 1.684512972831726, "learning_rate": 6.0248488702498353e-05, "loss": 1.1998, "step": 17905 }, { "epoch": 0.6412519920497073, "grad_norm": 2.2713093757629395, "learning_rate": 6.0237845766376035e-05, "loss": 1.6522, "step": 17906 }, { "epoch": 0.6412878041792756, "grad_norm": 1.4675745964050293, "learning_rate": 6.022720336520218e-05, "loss": 1.632, "step": 17907 }, { "epoch": 0.6413236163088438, "grad_norm": 1.731246829032898, "learning_rate": 6.021656149912003e-05, "loss": 1.7003, "step": 17908 }, { "epoch": 0.6413594284384121, "grad_norm": 1.5518698692321777, "learning_rate": 6.020592016827271e-05, "loss": 1.4585, "step": 17909 }, { "epoch": 0.6413952405679804, "grad_norm": 1.5268816947937012, "learning_rate": 6.019527937280342e-05, "loss": 1.244, "step": 17910 }, { "epoch": 0.6414310526975486, "grad_norm": 2.2172024250030518, "learning_rate": 6.018463911285528e-05, "loss": 1.345, "step": 17911 }, { "epoch": 0.6414668648271169, "grad_norm": 2.417259693145752, "learning_rate": 6.0173999388571486e-05, "loss": 1.4226, "step": 17912 }, { "epoch": 0.6415026769566853, "grad_norm": 1.5019891262054443, "learning_rate": 6.0163360200095153e-05, "loss": 1.4445, "step": 17913 }, { "epoch": 0.6415384890862535, "grad_norm": 1.449415683746338, "learning_rate": 6.015272154756941e-05, "loss": 1.5976, "step": 17914 }, { "epoch": 0.6415743012158218, "grad_norm": 1.4184887409210205, "learning_rate": 6.014208343113741e-05, "loss": 1.3103, "step": 17915 }, { "epoch": 0.6416101133453901, "grad_norm": 1.948178768157959, "learning_rate": 6.0131445850942256e-05, "loss": 1.5137, "step": 17916 }, { "epoch": 0.6416459254749584, "grad_norm": 2.6223580837249756, "learning_rate": 6.012080880712708e-05, "loss": 1.2029, "step": 17917 }, { "epoch": 0.6416817376045266, "grad_norm": 1.2477376461029053, "learning_rate": 6.011017229983497e-05, "loss": 1.3365, "step": 17918 }, { "epoch": 0.6417175497340949, "grad_norm": 1.6361243724822998, "learning_rate": 6.0099536329209046e-05, "loss": 1.5658, "step": 17919 }, { "epoch": 0.6417533618636633, "grad_norm": 1.3723866939544678, "learning_rate": 6.008890089539239e-05, "loss": 1.501, "step": 17920 }, { "epoch": 0.6417891739932315, "grad_norm": 1.4834378957748413, "learning_rate": 6.0078265998528105e-05, "loss": 1.2211, "step": 17921 }, { "epoch": 0.6418249861227998, "grad_norm": 1.4249910116195679, "learning_rate": 6.006763163875925e-05, "loss": 1.2466, "step": 17922 }, { "epoch": 0.6418607982523681, "grad_norm": 2.4966237545013428, "learning_rate": 6.005699781622889e-05, "loss": 1.5329, "step": 17923 }, { "epoch": 0.6418966103819364, "grad_norm": 1.3982599973678589, "learning_rate": 6.00463645310801e-05, "loss": 1.3409, "step": 17924 }, { "epoch": 0.6419324225115046, "grad_norm": 1.6006054878234863, "learning_rate": 6.003573178345594e-05, "loss": 1.4297, "step": 17925 }, { "epoch": 0.6419682346410729, "grad_norm": 1.729066014289856, "learning_rate": 6.002509957349948e-05, "loss": 1.3919, "step": 17926 }, { "epoch": 0.6420040467706413, "grad_norm": 1.4634580612182617, "learning_rate": 6.001446790135371e-05, "loss": 1.2687, "step": 17927 }, { "epoch": 0.6420398589002095, "grad_norm": 1.6232527494430542, "learning_rate": 6.0003836767161726e-05, "loss": 1.2855, "step": 17928 }, { "epoch": 0.6420756710297778, "grad_norm": 1.6562719345092773, "learning_rate": 5.999320617106649e-05, "loss": 1.5466, "step": 17929 }, { "epoch": 0.6421114831593461, "grad_norm": 1.4762431383132935, "learning_rate": 5.9982576113211095e-05, "loss": 1.2822, "step": 17930 }, { "epoch": 0.6421472952889143, "grad_norm": 1.5727423429489136, "learning_rate": 5.9971946593738525e-05, "loss": 1.317, "step": 17931 }, { "epoch": 0.6421831074184826, "grad_norm": 1.6262317895889282, "learning_rate": 5.996131761279176e-05, "loss": 1.701, "step": 17932 }, { "epoch": 0.6422189195480509, "grad_norm": 1.5175750255584717, "learning_rate": 5.995068917051383e-05, "loss": 1.3711, "step": 17933 }, { "epoch": 0.6422547316776193, "grad_norm": 1.4356316328048706, "learning_rate": 5.9940061267047695e-05, "loss": 1.4751, "step": 17934 }, { "epoch": 0.6422905438071875, "grad_norm": 2.049543619155884, "learning_rate": 5.992943390253639e-05, "loss": 1.6956, "step": 17935 }, { "epoch": 0.6423263559367558, "grad_norm": 1.311546802520752, "learning_rate": 5.991880707712284e-05, "loss": 1.445, "step": 17936 }, { "epoch": 0.6423621680663241, "grad_norm": 2.0570836067199707, "learning_rate": 5.9908180790950064e-05, "loss": 1.3155, "step": 17937 }, { "epoch": 0.6423979801958923, "grad_norm": 1.6217472553253174, "learning_rate": 5.989755504416098e-05, "loss": 1.4565, "step": 17938 }, { "epoch": 0.6424337923254606, "grad_norm": 1.4959290027618408, "learning_rate": 5.988692983689859e-05, "loss": 1.7333, "step": 17939 }, { "epoch": 0.6424696044550289, "grad_norm": 1.4493623971939087, "learning_rate": 5.98763051693058e-05, "loss": 1.2832, "step": 17940 }, { "epoch": 0.6425054165845973, "grad_norm": 1.8293143510818481, "learning_rate": 5.9865681041525566e-05, "loss": 1.7435, "step": 17941 }, { "epoch": 0.6425412287141655, "grad_norm": 1.8717924356460571, "learning_rate": 5.9855057453700836e-05, "loss": 1.7222, "step": 17942 }, { "epoch": 0.6425770408437338, "grad_norm": 2.5850019454956055, "learning_rate": 5.98444344059745e-05, "loss": 1.4265, "step": 17943 }, { "epoch": 0.6426128529733021, "grad_norm": 1.774579644203186, "learning_rate": 5.9833811898489534e-05, "loss": 1.478, "step": 17944 }, { "epoch": 0.6426486651028703, "grad_norm": 1.7594568729400635, "learning_rate": 5.982318993138879e-05, "loss": 1.6105, "step": 17945 }, { "epoch": 0.6426844772324386, "grad_norm": 1.3756812810897827, "learning_rate": 5.981256850481523e-05, "loss": 1.362, "step": 17946 }, { "epoch": 0.6427202893620069, "grad_norm": 1.6097584962844849, "learning_rate": 5.980194761891169e-05, "loss": 1.5736, "step": 17947 }, { "epoch": 0.6427561014915752, "grad_norm": 1.5040655136108398, "learning_rate": 5.9791327273821105e-05, "loss": 1.577, "step": 17948 }, { "epoch": 0.6427919136211435, "grad_norm": 2.5497679710388184, "learning_rate": 5.978070746968637e-05, "loss": 1.4751, "step": 17949 }, { "epoch": 0.6428277257507118, "grad_norm": 1.8910397291183472, "learning_rate": 5.977008820665031e-05, "loss": 1.6845, "step": 17950 }, { "epoch": 0.6428635378802801, "grad_norm": 1.77099609375, "learning_rate": 5.975946948485583e-05, "loss": 1.5673, "step": 17951 }, { "epoch": 0.6428993500098483, "grad_norm": 2.0042824745178223, "learning_rate": 5.974885130444577e-05, "loss": 1.5555, "step": 17952 }, { "epoch": 0.6429351621394166, "grad_norm": 1.585511565208435, "learning_rate": 5.9738233665563017e-05, "loss": 1.3588, "step": 17953 }, { "epoch": 0.6429709742689849, "grad_norm": 1.3501858711242676, "learning_rate": 5.972761656835038e-05, "loss": 1.2952, "step": 17954 }, { "epoch": 0.6430067863985532, "grad_norm": 1.8139315843582153, "learning_rate": 5.971700001295072e-05, "loss": 1.4315, "step": 17955 }, { "epoch": 0.6430425985281215, "grad_norm": 1.828161597251892, "learning_rate": 5.9706383999506855e-05, "loss": 1.4066, "step": 17956 }, { "epoch": 0.6430784106576898, "grad_norm": 1.7610077857971191, "learning_rate": 5.969576852816163e-05, "loss": 1.7405, "step": 17957 }, { "epoch": 0.643114222787258, "grad_norm": 2.1214122772216797, "learning_rate": 5.968515359905785e-05, "loss": 1.4642, "step": 17958 }, { "epoch": 0.6431500349168263, "grad_norm": 2.081220865249634, "learning_rate": 5.967453921233832e-05, "loss": 1.465, "step": 17959 }, { "epoch": 0.6431858470463946, "grad_norm": 2.047666549682617, "learning_rate": 5.966392536814585e-05, "loss": 1.4785, "step": 17960 }, { "epoch": 0.6432216591759629, "grad_norm": 2.4153099060058594, "learning_rate": 5.9653312066623234e-05, "loss": 1.7967, "step": 17961 }, { "epoch": 0.6432574713055312, "grad_norm": 1.863404393196106, "learning_rate": 5.964269930791326e-05, "loss": 1.5028, "step": 17962 }, { "epoch": 0.6432932834350995, "grad_norm": 1.4706677198410034, "learning_rate": 5.963208709215871e-05, "loss": 1.3202, "step": 17963 }, { "epoch": 0.6433290955646678, "grad_norm": 1.6551233530044556, "learning_rate": 5.962147541950236e-05, "loss": 1.4942, "step": 17964 }, { "epoch": 0.643364907694236, "grad_norm": 2.2377161979675293, "learning_rate": 5.961086429008696e-05, "loss": 1.3441, "step": 17965 }, { "epoch": 0.6434007198238043, "grad_norm": 1.5724660158157349, "learning_rate": 5.960025370405531e-05, "loss": 1.3756, "step": 17966 }, { "epoch": 0.6434365319533726, "grad_norm": 1.5656579732894897, "learning_rate": 5.958964366155014e-05, "loss": 1.4846, "step": 17967 }, { "epoch": 0.6434723440829409, "grad_norm": 2.0878260135650635, "learning_rate": 5.957903416271414e-05, "loss": 1.6058, "step": 17968 }, { "epoch": 0.6435081562125092, "grad_norm": 1.9619213342666626, "learning_rate": 5.9568425207690146e-05, "loss": 1.5998, "step": 17969 }, { "epoch": 0.6435439683420775, "grad_norm": 1.3761613368988037, "learning_rate": 5.9557816796620804e-05, "loss": 1.6619, "step": 17970 }, { "epoch": 0.6435797804716458, "grad_norm": 1.948231816291809, "learning_rate": 5.954720892964889e-05, "loss": 1.3027, "step": 17971 }, { "epoch": 0.643615592601214, "grad_norm": 1.6816844940185547, "learning_rate": 5.9536601606917075e-05, "loss": 1.4994, "step": 17972 }, { "epoch": 0.6436514047307823, "grad_norm": 1.75279700756073, "learning_rate": 5.952599482856811e-05, "loss": 1.5466, "step": 17973 }, { "epoch": 0.6436872168603506, "grad_norm": 1.7188254594802856, "learning_rate": 5.951538859474467e-05, "loss": 1.5254, "step": 17974 }, { "epoch": 0.6437230289899188, "grad_norm": 1.8281207084655762, "learning_rate": 5.950478290558947e-05, "loss": 1.5956, "step": 17975 }, { "epoch": 0.6437588411194872, "grad_norm": 1.9698082208633423, "learning_rate": 5.9494177761245194e-05, "loss": 1.5074, "step": 17976 }, { "epoch": 0.6437946532490555, "grad_norm": 1.3871486186981201, "learning_rate": 5.9483573161854464e-05, "loss": 1.5655, "step": 17977 }, { "epoch": 0.6438304653786238, "grad_norm": 1.6179381608963013, "learning_rate": 5.947296910756004e-05, "loss": 1.2935, "step": 17978 }, { "epoch": 0.643866277508192, "grad_norm": 1.5491410493850708, "learning_rate": 5.946236559850449e-05, "loss": 1.4044, "step": 17979 }, { "epoch": 0.6439020896377603, "grad_norm": 2.000506639480591, "learning_rate": 5.945176263483057e-05, "loss": 1.536, "step": 17980 }, { "epoch": 0.6439379017673286, "grad_norm": 1.5044797658920288, "learning_rate": 5.9441160216680826e-05, "loss": 1.6398, "step": 17981 }, { "epoch": 0.6439737138968968, "grad_norm": 2.1576712131500244, "learning_rate": 5.9430558344198016e-05, "loss": 1.4296, "step": 17982 }, { "epoch": 0.6440095260264652, "grad_norm": 1.4655178785324097, "learning_rate": 5.941995701752465e-05, "loss": 1.4955, "step": 17983 }, { "epoch": 0.6440453381560335, "grad_norm": 1.7146788835525513, "learning_rate": 5.9409356236803456e-05, "loss": 1.6319, "step": 17984 }, { "epoch": 0.6440811502856018, "grad_norm": 2.0093584060668945, "learning_rate": 5.9398756002177035e-05, "loss": 1.4727, "step": 17985 }, { "epoch": 0.64411696241517, "grad_norm": 1.5980204343795776, "learning_rate": 5.938815631378794e-05, "loss": 1.4705, "step": 17986 }, { "epoch": 0.6441527745447383, "grad_norm": 1.7558846473693848, "learning_rate": 5.937755717177885e-05, "loss": 1.4372, "step": 17987 }, { "epoch": 0.6441885866743066, "grad_norm": 2.151200294494629, "learning_rate": 5.9366958576292284e-05, "loss": 1.5712, "step": 17988 }, { "epoch": 0.6442243988038748, "grad_norm": 1.6231688261032104, "learning_rate": 5.9356360527470934e-05, "loss": 1.5356, "step": 17989 }, { "epoch": 0.6442602109334432, "grad_norm": 2.068516969680786, "learning_rate": 5.9345763025457266e-05, "loss": 1.7413, "step": 17990 }, { "epoch": 0.6442960230630115, "grad_norm": 1.3470734357833862, "learning_rate": 5.9335166070393975e-05, "loss": 1.6185, "step": 17991 }, { "epoch": 0.6443318351925797, "grad_norm": 1.7504842281341553, "learning_rate": 5.93245696624235e-05, "loss": 1.5977, "step": 17992 }, { "epoch": 0.644367647322148, "grad_norm": 1.6177539825439453, "learning_rate": 5.931397380168855e-05, "loss": 1.3695, "step": 17993 }, { "epoch": 0.6444034594517163, "grad_norm": 2.0384392738342285, "learning_rate": 5.9303378488331576e-05, "loss": 1.5366, "step": 17994 }, { "epoch": 0.6444392715812846, "grad_norm": 2.010671377182007, "learning_rate": 5.9292783722495126e-05, "loss": 1.5376, "step": 17995 }, { "epoch": 0.6444750837108528, "grad_norm": 1.5663658380508423, "learning_rate": 5.928218950432179e-05, "loss": 1.3194, "step": 17996 }, { "epoch": 0.6445108958404212, "grad_norm": 1.3770331144332886, "learning_rate": 5.927159583395403e-05, "loss": 1.4241, "step": 17997 }, { "epoch": 0.6445467079699895, "grad_norm": 1.6570316553115845, "learning_rate": 5.926100271153446e-05, "loss": 1.4073, "step": 17998 }, { "epoch": 0.6445825200995577, "grad_norm": 1.6265000104904175, "learning_rate": 5.9250410137205506e-05, "loss": 1.485, "step": 17999 }, { "epoch": 0.644618332229126, "grad_norm": 1.9524234533309937, "learning_rate": 5.923981811110977e-05, "loss": 1.7584, "step": 18000 }, { "epoch": 0.6446541443586943, "grad_norm": 2.180847406387329, "learning_rate": 5.922922663338969e-05, "loss": 1.6183, "step": 18001 }, { "epoch": 0.6446899564882625, "grad_norm": 1.5719871520996094, "learning_rate": 5.921863570418775e-05, "loss": 1.5754, "step": 18002 }, { "epoch": 0.6447257686178308, "grad_norm": 1.1843252182006836, "learning_rate": 5.9208045323646474e-05, "loss": 1.3016, "step": 18003 }, { "epoch": 0.6447615807473992, "grad_norm": 2.256765604019165, "learning_rate": 5.919745549190834e-05, "loss": 1.483, "step": 18004 }, { "epoch": 0.6447973928769675, "grad_norm": 1.5203441381454468, "learning_rate": 5.91868662091158e-05, "loss": 1.4898, "step": 18005 }, { "epoch": 0.6448332050065357, "grad_norm": 1.4746381044387817, "learning_rate": 5.9176277475411324e-05, "loss": 1.4107, "step": 18006 }, { "epoch": 0.644869017136104, "grad_norm": 1.7098374366760254, "learning_rate": 5.91656892909374e-05, "loss": 1.1795, "step": 18007 }, { "epoch": 0.6449048292656723, "grad_norm": 1.5304396152496338, "learning_rate": 5.915510165583642e-05, "loss": 1.2104, "step": 18008 }, { "epoch": 0.6449406413952405, "grad_norm": 1.4094974994659424, "learning_rate": 5.91445145702509e-05, "loss": 1.361, "step": 18009 }, { "epoch": 0.6449764535248088, "grad_norm": 1.5532375574111938, "learning_rate": 5.9133928034323215e-05, "loss": 1.5194, "step": 18010 }, { "epoch": 0.6450122656543772, "grad_norm": 2.0995876789093018, "learning_rate": 5.912334204819581e-05, "loss": 1.6518, "step": 18011 }, { "epoch": 0.6450480777839455, "grad_norm": 1.8141247034072876, "learning_rate": 5.911275661201112e-05, "loss": 1.5584, "step": 18012 }, { "epoch": 0.6450838899135137, "grad_norm": 1.501083254814148, "learning_rate": 5.910217172591155e-05, "loss": 1.437, "step": 18013 }, { "epoch": 0.645119702043082, "grad_norm": 1.8988889455795288, "learning_rate": 5.90915873900395e-05, "loss": 1.2275, "step": 18014 }, { "epoch": 0.6451555141726503, "grad_norm": 1.5784447193145752, "learning_rate": 5.908100360453737e-05, "loss": 1.5859, "step": 18015 }, { "epoch": 0.6451913263022185, "grad_norm": 1.3819835186004639, "learning_rate": 5.9070420369547564e-05, "loss": 1.3364, "step": 18016 }, { "epoch": 0.6452271384317868, "grad_norm": 1.9142459630966187, "learning_rate": 5.905983768521244e-05, "loss": 1.2799, "step": 18017 }, { "epoch": 0.6452629505613552, "grad_norm": 1.3652900457382202, "learning_rate": 5.904925555167442e-05, "loss": 1.206, "step": 18018 }, { "epoch": 0.6452987626909235, "grad_norm": 1.5359416007995605, "learning_rate": 5.903867396907583e-05, "loss": 1.1311, "step": 18019 }, { "epoch": 0.6453345748204917, "grad_norm": 1.9005250930786133, "learning_rate": 5.9028092937559034e-05, "loss": 1.5002, "step": 18020 }, { "epoch": 0.64537038695006, "grad_norm": 1.9126530885696411, "learning_rate": 5.901751245726641e-05, "loss": 1.7099, "step": 18021 }, { "epoch": 0.6454061990796283, "grad_norm": 1.833815574645996, "learning_rate": 5.9006932528340284e-05, "loss": 1.4293, "step": 18022 }, { "epoch": 0.6454420112091965, "grad_norm": 3.0067315101623535, "learning_rate": 5.899635315092301e-05, "loss": 1.5751, "step": 18023 }, { "epoch": 0.6454778233387648, "grad_norm": 1.957001805305481, "learning_rate": 5.89857743251569e-05, "loss": 1.3815, "step": 18024 }, { "epoch": 0.6455136354683331, "grad_norm": 1.573081135749817, "learning_rate": 5.897519605118431e-05, "loss": 1.3973, "step": 18025 }, { "epoch": 0.6455494475979014, "grad_norm": 1.9092211723327637, "learning_rate": 5.896461832914753e-05, "loss": 1.6152, "step": 18026 }, { "epoch": 0.6455852597274697, "grad_norm": 1.6830337047576904, "learning_rate": 5.8954041159188876e-05, "loss": 1.8081, "step": 18027 }, { "epoch": 0.645621071857038, "grad_norm": 1.7186933755874634, "learning_rate": 5.894346454145068e-05, "loss": 1.4044, "step": 18028 }, { "epoch": 0.6456568839866063, "grad_norm": 1.6428803205490112, "learning_rate": 5.8932888476075166e-05, "loss": 1.5604, "step": 18029 }, { "epoch": 0.6456926961161745, "grad_norm": 2.214829683303833, "learning_rate": 5.89223129632047e-05, "loss": 1.3191, "step": 18030 }, { "epoch": 0.6457285082457428, "grad_norm": 1.8516603708267212, "learning_rate": 5.8911738002981506e-05, "loss": 1.5621, "step": 18031 }, { "epoch": 0.6457643203753111, "grad_norm": 2.198071002960205, "learning_rate": 5.890116359554789e-05, "loss": 1.398, "step": 18032 }, { "epoch": 0.6458001325048794, "grad_norm": 1.7119433879852295, "learning_rate": 5.8890589741046084e-05, "loss": 1.6987, "step": 18033 }, { "epoch": 0.6458359446344477, "grad_norm": 1.5865453481674194, "learning_rate": 5.888001643961839e-05, "loss": 1.4178, "step": 18034 }, { "epoch": 0.645871756764016, "grad_norm": 1.7404098510742188, "learning_rate": 5.886944369140701e-05, "loss": 1.8102, "step": 18035 }, { "epoch": 0.6459075688935842, "grad_norm": 1.6258031129837036, "learning_rate": 5.8858871496554235e-05, "loss": 1.4598, "step": 18036 }, { "epoch": 0.6459433810231525, "grad_norm": 2.006159543991089, "learning_rate": 5.884829985520227e-05, "loss": 1.6035, "step": 18037 }, { "epoch": 0.6459791931527208, "grad_norm": 1.9466265439987183, "learning_rate": 5.883772876749334e-05, "loss": 1.3743, "step": 18038 }, { "epoch": 0.6460150052822891, "grad_norm": 1.9104527235031128, "learning_rate": 5.882715823356968e-05, "loss": 1.4299, "step": 18039 }, { "epoch": 0.6460508174118574, "grad_norm": 2.058929920196533, "learning_rate": 5.881658825357348e-05, "loss": 1.7316, "step": 18040 }, { "epoch": 0.6460866295414257, "grad_norm": 1.6255000829696655, "learning_rate": 5.8806018827646994e-05, "loss": 1.4241, "step": 18041 }, { "epoch": 0.646122441670994, "grad_norm": 1.745592713356018, "learning_rate": 5.879544995593236e-05, "loss": 1.6278, "step": 18042 }, { "epoch": 0.6461582538005622, "grad_norm": 1.3595131635665894, "learning_rate": 5.878488163857181e-05, "loss": 1.262, "step": 18043 }, { "epoch": 0.6461940659301305, "grad_norm": 1.6638784408569336, "learning_rate": 5.87743138757075e-05, "loss": 1.5087, "step": 18044 }, { "epoch": 0.6462298780596988, "grad_norm": 1.6791967153549194, "learning_rate": 5.8763746667481634e-05, "loss": 1.3979, "step": 18045 }, { "epoch": 0.646265690189267, "grad_norm": 1.6635165214538574, "learning_rate": 5.8753180014036377e-05, "loss": 1.4787, "step": 18046 }, { "epoch": 0.6463015023188354, "grad_norm": 1.7006889581680298, "learning_rate": 5.874261391551386e-05, "loss": 1.5113, "step": 18047 }, { "epoch": 0.6463373144484037, "grad_norm": 1.8248895406723022, "learning_rate": 5.873204837205626e-05, "loss": 1.518, "step": 18048 }, { "epoch": 0.646373126577972, "grad_norm": 1.6426078081130981, "learning_rate": 5.8721483383805696e-05, "loss": 1.4435, "step": 18049 }, { "epoch": 0.6464089387075402, "grad_norm": 1.9970711469650269, "learning_rate": 5.871091895090437e-05, "loss": 1.2891, "step": 18050 }, { "epoch": 0.6464447508371085, "grad_norm": 2.244947910308838, "learning_rate": 5.870035507349434e-05, "loss": 1.8148, "step": 18051 }, { "epoch": 0.6464805629666768, "grad_norm": 1.321719765663147, "learning_rate": 5.8689791751717757e-05, "loss": 1.286, "step": 18052 }, { "epoch": 0.646516375096245, "grad_norm": 1.4628890752792358, "learning_rate": 5.867922898571675e-05, "loss": 1.6739, "step": 18053 }, { "epoch": 0.6465521872258134, "grad_norm": 2.1549174785614014, "learning_rate": 5.8668666775633426e-05, "loss": 1.8747, "step": 18054 }, { "epoch": 0.6465879993553817, "grad_norm": 1.6913148164749146, "learning_rate": 5.8658105121609896e-05, "loss": 1.4313, "step": 18055 }, { "epoch": 0.64662381148495, "grad_norm": 1.9767898321151733, "learning_rate": 5.864754402378818e-05, "loss": 1.5655, "step": 18056 }, { "epoch": 0.6466596236145182, "grad_norm": 1.8027931451797485, "learning_rate": 5.863698348231045e-05, "loss": 1.4809, "step": 18057 }, { "epoch": 0.6466954357440865, "grad_norm": 1.5340521335601807, "learning_rate": 5.862642349731874e-05, "loss": 1.4937, "step": 18058 }, { "epoch": 0.6467312478736548, "grad_norm": 1.65854811668396, "learning_rate": 5.861586406895514e-05, "loss": 1.2827, "step": 18059 }, { "epoch": 0.646767060003223, "grad_norm": 1.2305033206939697, "learning_rate": 5.8605305197361705e-05, "loss": 1.4615, "step": 18060 }, { "epoch": 0.6468028721327914, "grad_norm": 1.3957847356796265, "learning_rate": 5.859474688268051e-05, "loss": 1.4643, "step": 18061 }, { "epoch": 0.6468386842623597, "grad_norm": 1.881250262260437, "learning_rate": 5.8584189125053556e-05, "loss": 1.2637, "step": 18062 }, { "epoch": 0.646874496391928, "grad_norm": 1.5098015069961548, "learning_rate": 5.857363192462294e-05, "loss": 1.5307, "step": 18063 }, { "epoch": 0.6469103085214962, "grad_norm": 1.6502503156661987, "learning_rate": 5.8563075281530685e-05, "loss": 1.1335, "step": 18064 }, { "epoch": 0.6469461206510645, "grad_norm": 1.6741015911102295, "learning_rate": 5.855251919591875e-05, "loss": 1.5119, "step": 18065 }, { "epoch": 0.6469819327806328, "grad_norm": 1.6774035692214966, "learning_rate": 5.8541963667929276e-05, "loss": 1.4242, "step": 18066 }, { "epoch": 0.647017744910201, "grad_norm": 1.69266939163208, "learning_rate": 5.8531408697704124e-05, "loss": 1.2887, "step": 18067 }, { "epoch": 0.6470535570397694, "grad_norm": 1.6085079908370972, "learning_rate": 5.852085428538545e-05, "loss": 1.5291, "step": 18068 }, { "epoch": 0.6470893691693377, "grad_norm": 1.8055675029754639, "learning_rate": 5.851030043111512e-05, "loss": 1.6574, "step": 18069 }, { "epoch": 0.647125181298906, "grad_norm": 1.7611117362976074, "learning_rate": 5.849974713503521e-05, "loss": 1.5388, "step": 18070 }, { "epoch": 0.6471609934284742, "grad_norm": 1.4818743467330933, "learning_rate": 5.848919439728765e-05, "loss": 1.2733, "step": 18071 }, { "epoch": 0.6471968055580425, "grad_norm": 1.5272941589355469, "learning_rate": 5.847864221801446e-05, "loss": 1.5622, "step": 18072 }, { "epoch": 0.6472326176876108, "grad_norm": 1.7738280296325684, "learning_rate": 5.8468090597357595e-05, "loss": 1.6174, "step": 18073 }, { "epoch": 0.647268429817179, "grad_norm": 2.0849111080169678, "learning_rate": 5.845753953545894e-05, "loss": 1.4549, "step": 18074 }, { "epoch": 0.6473042419467474, "grad_norm": 2.5184481143951416, "learning_rate": 5.8446989032460574e-05, "loss": 1.7102, "step": 18075 }, { "epoch": 0.6473400540763157, "grad_norm": 1.650846004486084, "learning_rate": 5.84364390885043e-05, "loss": 1.2554, "step": 18076 }, { "epoch": 0.6473758662058839, "grad_norm": 1.4134989976882935, "learning_rate": 5.8425889703732193e-05, "loss": 1.3435, "step": 18077 }, { "epoch": 0.6474116783354522, "grad_norm": 1.4370696544647217, "learning_rate": 5.841534087828604e-05, "loss": 1.2767, "step": 18078 }, { "epoch": 0.6474474904650205, "grad_norm": 1.943215012550354, "learning_rate": 5.840479261230791e-05, "loss": 1.6713, "step": 18079 }, { "epoch": 0.6474833025945887, "grad_norm": 1.4532264471054077, "learning_rate": 5.839424490593957e-05, "loss": 1.5316, "step": 18080 }, { "epoch": 0.647519114724157, "grad_norm": 2.2967021465301514, "learning_rate": 5.8383697759323045e-05, "loss": 1.1804, "step": 18081 }, { "epoch": 0.6475549268537254, "grad_norm": 1.5080279111862183, "learning_rate": 5.8373151172600207e-05, "loss": 1.2792, "step": 18082 }, { "epoch": 0.6475907389832937, "grad_norm": 1.3563507795333862, "learning_rate": 5.836260514591287e-05, "loss": 1.6779, "step": 18083 }, { "epoch": 0.6476265511128619, "grad_norm": 1.6162803173065186, "learning_rate": 5.8352059679402994e-05, "loss": 1.4045, "step": 18084 }, { "epoch": 0.6476623632424302, "grad_norm": 1.5351731777191162, "learning_rate": 5.834151477321242e-05, "loss": 1.2639, "step": 18085 }, { "epoch": 0.6476981753719985, "grad_norm": 1.8498332500457764, "learning_rate": 5.833097042748308e-05, "loss": 1.5596, "step": 18086 }, { "epoch": 0.6477339875015667, "grad_norm": 1.558227777481079, "learning_rate": 5.832042664235673e-05, "loss": 1.3214, "step": 18087 }, { "epoch": 0.647769799631135, "grad_norm": 1.8643543720245361, "learning_rate": 5.8309883417975275e-05, "loss": 1.2727, "step": 18088 }, { "epoch": 0.6478056117607034, "grad_norm": 1.2156808376312256, "learning_rate": 5.829934075448058e-05, "loss": 1.1644, "step": 18089 }, { "epoch": 0.6478414238902717, "grad_norm": 1.5507395267486572, "learning_rate": 5.8288798652014485e-05, "loss": 1.4555, "step": 18090 }, { "epoch": 0.6478772360198399, "grad_norm": 1.6052091121673584, "learning_rate": 5.827825711071877e-05, "loss": 1.2833, "step": 18091 }, { "epoch": 0.6479130481494082, "grad_norm": 1.7828127145767212, "learning_rate": 5.8267716130735295e-05, "loss": 1.452, "step": 18092 }, { "epoch": 0.6479488602789765, "grad_norm": 2.0876612663269043, "learning_rate": 5.82571757122059e-05, "loss": 1.4555, "step": 18093 }, { "epoch": 0.6479846724085447, "grad_norm": 1.3170816898345947, "learning_rate": 5.824663585527232e-05, "loss": 1.533, "step": 18094 }, { "epoch": 0.648020484538113, "grad_norm": 1.6874192953109741, "learning_rate": 5.8236096560076405e-05, "loss": 1.3156, "step": 18095 }, { "epoch": 0.6480562966676814, "grad_norm": 1.6135790348052979, "learning_rate": 5.8225557826759935e-05, "loss": 1.5129, "step": 18096 }, { "epoch": 0.6480921087972497, "grad_norm": 1.4755977392196655, "learning_rate": 5.821501965546474e-05, "loss": 1.4064, "step": 18097 }, { "epoch": 0.6481279209268179, "grad_norm": 2.032099723815918, "learning_rate": 5.820448204633251e-05, "loss": 1.5011, "step": 18098 }, { "epoch": 0.6481637330563862, "grad_norm": 1.5562313795089722, "learning_rate": 5.819394499950508e-05, "loss": 1.4597, "step": 18099 }, { "epoch": 0.6481995451859545, "grad_norm": 1.6698648929595947, "learning_rate": 5.8183408515124216e-05, "loss": 1.4674, "step": 18100 }, { "epoch": 0.6482353573155227, "grad_norm": 1.8499637842178345, "learning_rate": 5.817287259333162e-05, "loss": 1.574, "step": 18101 }, { "epoch": 0.648271169445091, "grad_norm": 2.0695765018463135, "learning_rate": 5.816233723426907e-05, "loss": 1.3691, "step": 18102 }, { "epoch": 0.6483069815746594, "grad_norm": 1.5621305704116821, "learning_rate": 5.81518024380783e-05, "loss": 1.9263, "step": 18103 }, { "epoch": 0.6483427937042276, "grad_norm": 1.3065502643585205, "learning_rate": 5.814126820490109e-05, "loss": 1.5336, "step": 18104 }, { "epoch": 0.6483786058337959, "grad_norm": 1.9310072660446167, "learning_rate": 5.8130734534879075e-05, "loss": 1.8141, "step": 18105 }, { "epoch": 0.6484144179633642, "grad_norm": 2.5569870471954346, "learning_rate": 5.812020142815403e-05, "loss": 1.4777, "step": 18106 }, { "epoch": 0.6484502300929325, "grad_norm": 1.4359135627746582, "learning_rate": 5.810966888486768e-05, "loss": 1.0495, "step": 18107 }, { "epoch": 0.6484860422225007, "grad_norm": 1.7345796823501587, "learning_rate": 5.809913690516169e-05, "loss": 1.1905, "step": 18108 }, { "epoch": 0.648521854352069, "grad_norm": 1.5571644306182861, "learning_rate": 5.808860548917778e-05, "loss": 1.4347, "step": 18109 }, { "epoch": 0.6485576664816374, "grad_norm": 2.2641749382019043, "learning_rate": 5.807807463705754e-05, "loss": 1.3108, "step": 18110 }, { "epoch": 0.6485934786112056, "grad_norm": 1.5269492864608765, "learning_rate": 5.8067544348942825e-05, "loss": 1.6782, "step": 18111 }, { "epoch": 0.6486292907407739, "grad_norm": 1.2696325778961182, "learning_rate": 5.805701462497517e-05, "loss": 1.6576, "step": 18112 }, { "epoch": 0.6486651028703422, "grad_norm": 2.062124252319336, "learning_rate": 5.804648546529627e-05, "loss": 1.7734, "step": 18113 }, { "epoch": 0.6487009149999104, "grad_norm": 1.5553981065750122, "learning_rate": 5.803595687004779e-05, "loss": 1.5541, "step": 18114 }, { "epoch": 0.6487367271294787, "grad_norm": 2.4918081760406494, "learning_rate": 5.802542883937143e-05, "loss": 1.7596, "step": 18115 }, { "epoch": 0.648772539259047, "grad_norm": 2.098888635635376, "learning_rate": 5.801490137340879e-05, "loss": 1.3388, "step": 18116 }, { "epoch": 0.6488083513886154, "grad_norm": 1.448175311088562, "learning_rate": 5.80043744723014e-05, "loss": 1.3633, "step": 18117 }, { "epoch": 0.6488441635181836, "grad_norm": 1.5029058456420898, "learning_rate": 5.7993848136191065e-05, "loss": 1.5644, "step": 18118 }, { "epoch": 0.6488799756477519, "grad_norm": 2.209278106689453, "learning_rate": 5.7983322365219287e-05, "loss": 1.5103, "step": 18119 }, { "epoch": 0.6489157877773202, "grad_norm": 1.7898609638214111, "learning_rate": 5.797279715952774e-05, "loss": 1.6017, "step": 18120 }, { "epoch": 0.6489515999068884, "grad_norm": 2.122300624847412, "learning_rate": 5.796227251925792e-05, "loss": 1.7573, "step": 18121 }, { "epoch": 0.6489874120364567, "grad_norm": 1.6340723037719727, "learning_rate": 5.795174844455157e-05, "loss": 1.5623, "step": 18122 }, { "epoch": 0.649023224166025, "grad_norm": 1.4408314228057861, "learning_rate": 5.7941224935550166e-05, "loss": 1.5066, "step": 18123 }, { "epoch": 0.6490590362955934, "grad_norm": 1.8224083185195923, "learning_rate": 5.793070199239534e-05, "loss": 1.2898, "step": 18124 }, { "epoch": 0.6490948484251616, "grad_norm": 1.6745611429214478, "learning_rate": 5.7920179615228684e-05, "loss": 1.5694, "step": 18125 }, { "epoch": 0.6491306605547299, "grad_norm": 1.6290042400360107, "learning_rate": 5.790965780419171e-05, "loss": 1.2513, "step": 18126 }, { "epoch": 0.6491664726842982, "grad_norm": 1.885898232460022, "learning_rate": 5.7899136559426015e-05, "loss": 1.334, "step": 18127 }, { "epoch": 0.6492022848138664, "grad_norm": 1.878179669380188, "learning_rate": 5.788861588107306e-05, "loss": 1.6988, "step": 18128 }, { "epoch": 0.6492380969434347, "grad_norm": 2.0154190063476562, "learning_rate": 5.787809576927454e-05, "loss": 1.3509, "step": 18129 }, { "epoch": 0.649273909073003, "grad_norm": 1.7065162658691406, "learning_rate": 5.786757622417187e-05, "loss": 1.6076, "step": 18130 }, { "epoch": 0.6493097212025714, "grad_norm": 1.7891151905059814, "learning_rate": 5.7857057245906656e-05, "loss": 1.5914, "step": 18131 }, { "epoch": 0.6493455333321396, "grad_norm": 1.5112416744232178, "learning_rate": 5.784653883462029e-05, "loss": 1.3508, "step": 18132 }, { "epoch": 0.6493813454617079, "grad_norm": 1.3441530466079712, "learning_rate": 5.7836020990454444e-05, "loss": 1.4401, "step": 18133 }, { "epoch": 0.6494171575912762, "grad_norm": 1.4599716663360596, "learning_rate": 5.7825503713550555e-05, "loss": 1.3342, "step": 18134 }, { "epoch": 0.6494529697208444, "grad_norm": 1.3666131496429443, "learning_rate": 5.7814987004050084e-05, "loss": 1.2142, "step": 18135 }, { "epoch": 0.6494887818504127, "grad_norm": 1.5471409559249878, "learning_rate": 5.780447086209453e-05, "loss": 1.1893, "step": 18136 }, { "epoch": 0.649524593979981, "grad_norm": 1.7935959100723267, "learning_rate": 5.779395528782541e-05, "loss": 1.6171, "step": 18137 }, { "epoch": 0.6495604061095493, "grad_norm": 2.097639799118042, "learning_rate": 5.7783440281384205e-05, "loss": 1.445, "step": 18138 }, { "epoch": 0.6495962182391176, "grad_norm": 1.3056614398956299, "learning_rate": 5.777292584291227e-05, "loss": 1.4843, "step": 18139 }, { "epoch": 0.6496320303686859, "grad_norm": 1.3585635423660278, "learning_rate": 5.7762411972551254e-05, "loss": 1.2193, "step": 18140 }, { "epoch": 0.6496678424982542, "grad_norm": 1.2631720304489136, "learning_rate": 5.775189867044244e-05, "loss": 1.4512, "step": 18141 }, { "epoch": 0.6497036546278224, "grad_norm": 1.6314293146133423, "learning_rate": 5.7741385936727375e-05, "loss": 1.6188, "step": 18142 }, { "epoch": 0.6497394667573907, "grad_norm": 1.9369200468063354, "learning_rate": 5.7730873771547423e-05, "loss": 1.5405, "step": 18143 }, { "epoch": 0.649775278886959, "grad_norm": 1.9945943355560303, "learning_rate": 5.772036217504404e-05, "loss": 1.4426, "step": 18144 }, { "epoch": 0.6498110910165273, "grad_norm": 1.2491214275360107, "learning_rate": 5.770985114735868e-05, "loss": 1.3435, "step": 18145 }, { "epoch": 0.6498469031460956, "grad_norm": 1.999806523323059, "learning_rate": 5.76993406886327e-05, "loss": 1.4863, "step": 18146 }, { "epoch": 0.6498827152756639, "grad_norm": 1.5527349710464478, "learning_rate": 5.768883079900751e-05, "loss": 1.517, "step": 18147 }, { "epoch": 0.6499185274052321, "grad_norm": 1.8263483047485352, "learning_rate": 5.767832147862452e-05, "loss": 1.562, "step": 18148 }, { "epoch": 0.6499543395348004, "grad_norm": 1.532959222793579, "learning_rate": 5.7667812727625184e-05, "loss": 1.107, "step": 18149 }, { "epoch": 0.6499901516643687, "grad_norm": 1.593616008758545, "learning_rate": 5.765730454615072e-05, "loss": 1.4869, "step": 18150 }, { "epoch": 0.650025963793937, "grad_norm": 1.7059237957000732, "learning_rate": 5.764679693434269e-05, "loss": 1.5631, "step": 18151 }, { "epoch": 0.6500617759235053, "grad_norm": 2.210345506668091, "learning_rate": 5.763628989234238e-05, "loss": 1.4786, "step": 18152 }, { "epoch": 0.6500975880530736, "grad_norm": 1.792277455329895, "learning_rate": 5.76257834202911e-05, "loss": 1.3773, "step": 18153 }, { "epoch": 0.6501334001826419, "grad_norm": 1.5056220293045044, "learning_rate": 5.761527751833026e-05, "loss": 1.4831, "step": 18154 }, { "epoch": 0.6501692123122101, "grad_norm": 2.4533936977386475, "learning_rate": 5.760477218660119e-05, "loss": 1.5878, "step": 18155 }, { "epoch": 0.6502050244417784, "grad_norm": 1.424676537513733, "learning_rate": 5.759426742524524e-05, "loss": 1.2297, "step": 18156 }, { "epoch": 0.6502408365713467, "grad_norm": 1.37893807888031, "learning_rate": 5.75837632344037e-05, "loss": 1.6512, "step": 18157 }, { "epoch": 0.650276648700915, "grad_norm": 1.7505850791931152, "learning_rate": 5.757325961421791e-05, "loss": 1.4344, "step": 18158 }, { "epoch": 0.6503124608304833, "grad_norm": 1.5535842180252075, "learning_rate": 5.756275656482918e-05, "loss": 1.3422, "step": 18159 }, { "epoch": 0.6503482729600516, "grad_norm": 1.5393567085266113, "learning_rate": 5.7552254086378863e-05, "loss": 1.4882, "step": 18160 }, { "epoch": 0.6503840850896199, "grad_norm": 1.7456070184707642, "learning_rate": 5.754175217900817e-05, "loss": 1.766, "step": 18161 }, { "epoch": 0.6504198972191881, "grad_norm": 2.0329902172088623, "learning_rate": 5.753125084285844e-05, "loss": 1.5138, "step": 18162 }, { "epoch": 0.6504557093487564, "grad_norm": 1.3546897172927856, "learning_rate": 5.752075007807098e-05, "loss": 1.5011, "step": 18163 }, { "epoch": 0.6504915214783247, "grad_norm": 1.7984650135040283, "learning_rate": 5.751024988478701e-05, "loss": 1.5396, "step": 18164 }, { "epoch": 0.6505273336078929, "grad_norm": 1.3731608390808105, "learning_rate": 5.749975026314781e-05, "loss": 1.4057, "step": 18165 }, { "epoch": 0.6505631457374613, "grad_norm": 2.18913197517395, "learning_rate": 5.748925121329465e-05, "loss": 1.5947, "step": 18166 }, { "epoch": 0.6505989578670296, "grad_norm": 1.4021323919296265, "learning_rate": 5.747875273536882e-05, "loss": 1.316, "step": 18167 }, { "epoch": 0.6506347699965979, "grad_norm": 1.6417192220687866, "learning_rate": 5.746825482951148e-05, "loss": 1.4414, "step": 18168 }, { "epoch": 0.6506705821261661, "grad_norm": 2.237081289291382, "learning_rate": 5.7457757495863916e-05, "loss": 1.2582, "step": 18169 }, { "epoch": 0.6507063942557344, "grad_norm": 1.3216358423233032, "learning_rate": 5.744726073456739e-05, "loss": 1.7212, "step": 18170 }, { "epoch": 0.6507422063853027, "grad_norm": 1.4945186376571655, "learning_rate": 5.7436764545763034e-05, "loss": 1.6712, "step": 18171 }, { "epoch": 0.6507780185148709, "grad_norm": 2.1347923278808594, "learning_rate": 5.7426268929592105e-05, "loss": 1.6936, "step": 18172 }, { "epoch": 0.6508138306444393, "grad_norm": 1.752018690109253, "learning_rate": 5.7415773886195834e-05, "loss": 1.61, "step": 18173 }, { "epoch": 0.6508496427740076, "grad_norm": 1.5680664777755737, "learning_rate": 5.740527941571541e-05, "loss": 1.3054, "step": 18174 }, { "epoch": 0.6508854549035759, "grad_norm": 1.7406561374664307, "learning_rate": 5.739478551829198e-05, "loss": 1.3754, "step": 18175 }, { "epoch": 0.6509212670331441, "grad_norm": 1.6612472534179688, "learning_rate": 5.738429219406676e-05, "loss": 1.2253, "step": 18176 }, { "epoch": 0.6509570791627124, "grad_norm": 1.6799381971359253, "learning_rate": 5.7373799443180906e-05, "loss": 1.5481, "step": 18177 }, { "epoch": 0.6509928912922807, "grad_norm": 2.118349313735962, "learning_rate": 5.7363307265775635e-05, "loss": 1.381, "step": 18178 }, { "epoch": 0.6510287034218489, "grad_norm": 1.8382169008255005, "learning_rate": 5.7352815661992046e-05, "loss": 1.4768, "step": 18179 }, { "epoch": 0.6510645155514173, "grad_norm": 1.4919451475143433, "learning_rate": 5.734232463197129e-05, "loss": 1.1821, "step": 18180 }, { "epoch": 0.6511003276809856, "grad_norm": 1.5245254039764404, "learning_rate": 5.7331834175854596e-05, "loss": 1.4771, "step": 18181 }, { "epoch": 0.6511361398105538, "grad_norm": 1.4875792264938354, "learning_rate": 5.732134429378297e-05, "loss": 1.3327, "step": 18182 }, { "epoch": 0.6511719519401221, "grad_norm": 2.057316303253174, "learning_rate": 5.731085498589761e-05, "loss": 1.2137, "step": 18183 }, { "epoch": 0.6512077640696904, "grad_norm": 1.833107352256775, "learning_rate": 5.730036625233963e-05, "loss": 2.0461, "step": 18184 }, { "epoch": 0.6512435761992587, "grad_norm": 1.508131742477417, "learning_rate": 5.728987809325019e-05, "loss": 1.6103, "step": 18185 }, { "epoch": 0.6512793883288269, "grad_norm": 1.5430577993392944, "learning_rate": 5.727939050877031e-05, "loss": 1.5925, "step": 18186 }, { "epoch": 0.6513152004583953, "grad_norm": 2.0990476608276367, "learning_rate": 5.726890349904113e-05, "loss": 1.5422, "step": 18187 }, { "epoch": 0.6513510125879636, "grad_norm": 2.0384583473205566, "learning_rate": 5.725841706420376e-05, "loss": 1.45, "step": 18188 }, { "epoch": 0.6513868247175318, "grad_norm": 1.6039574146270752, "learning_rate": 5.724793120439923e-05, "loss": 1.7583, "step": 18189 }, { "epoch": 0.6514226368471001, "grad_norm": 1.6907927989959717, "learning_rate": 5.723744591976863e-05, "loss": 1.2868, "step": 18190 }, { "epoch": 0.6514584489766684, "grad_norm": 1.6348273754119873, "learning_rate": 5.722696121045303e-05, "loss": 1.482, "step": 18191 }, { "epoch": 0.6514942611062366, "grad_norm": 1.6397366523742676, "learning_rate": 5.7216477076593544e-05, "loss": 1.4915, "step": 18192 }, { "epoch": 0.6515300732358049, "grad_norm": 1.57917058467865, "learning_rate": 5.7205993518331134e-05, "loss": 1.2427, "step": 18193 }, { "epoch": 0.6515658853653733, "grad_norm": 1.8741562366485596, "learning_rate": 5.719551053580687e-05, "loss": 1.4965, "step": 18194 }, { "epoch": 0.6516016974949416, "grad_norm": 1.567555546760559, "learning_rate": 5.718502812916186e-05, "loss": 1.158, "step": 18195 }, { "epoch": 0.6516375096245098, "grad_norm": 1.4905515909194946, "learning_rate": 5.7174546298537005e-05, "loss": 1.5231, "step": 18196 }, { "epoch": 0.6516733217540781, "grad_norm": 1.853344202041626, "learning_rate": 5.71640650440734e-05, "loss": 1.7658, "step": 18197 }, { "epoch": 0.6517091338836464, "grad_norm": 2.142179250717163, "learning_rate": 5.715358436591205e-05, "loss": 1.9089, "step": 18198 }, { "epoch": 0.6517449460132146, "grad_norm": 2.0253007411956787, "learning_rate": 5.7143104264193984e-05, "loss": 1.3156, "step": 18199 }, { "epoch": 0.6517807581427829, "grad_norm": 1.5804954767227173, "learning_rate": 5.7132624739060134e-05, "loss": 1.1425, "step": 18200 }, { "epoch": 0.6518165702723513, "grad_norm": 1.8263784646987915, "learning_rate": 5.712214579065152e-05, "loss": 1.3975, "step": 18201 }, { "epoch": 0.6518523824019196, "grad_norm": 1.8882803916931152, "learning_rate": 5.711166741910912e-05, "loss": 1.453, "step": 18202 }, { "epoch": 0.6518881945314878, "grad_norm": 1.7997485399246216, "learning_rate": 5.710118962457396e-05, "loss": 1.3057, "step": 18203 }, { "epoch": 0.6519240066610561, "grad_norm": 1.5908443927764893, "learning_rate": 5.709071240718695e-05, "loss": 1.2405, "step": 18204 }, { "epoch": 0.6519598187906244, "grad_norm": 1.749822974205017, "learning_rate": 5.7080235767088994e-05, "loss": 1.4987, "step": 18205 }, { "epoch": 0.6519956309201926, "grad_norm": 1.3174817562103271, "learning_rate": 5.706975970442117e-05, "loss": 1.3869, "step": 18206 }, { "epoch": 0.6520314430497609, "grad_norm": 1.5723850727081299, "learning_rate": 5.7059284219324315e-05, "loss": 1.6457, "step": 18207 }, { "epoch": 0.6520672551793293, "grad_norm": 1.735958218574524, "learning_rate": 5.7048809311939446e-05, "loss": 1.3457, "step": 18208 }, { "epoch": 0.6521030673088976, "grad_norm": 1.7627031803131104, "learning_rate": 5.703833498240736e-05, "loss": 1.6223, "step": 18209 }, { "epoch": 0.6521388794384658, "grad_norm": 2.1572265625, "learning_rate": 5.702786123086914e-05, "loss": 1.2937, "step": 18210 }, { "epoch": 0.6521746915680341, "grad_norm": 1.6451489925384521, "learning_rate": 5.701738805746558e-05, "loss": 1.5857, "step": 18211 }, { "epoch": 0.6522105036976024, "grad_norm": 1.494231104850769, "learning_rate": 5.700691546233762e-05, "loss": 1.493, "step": 18212 }, { "epoch": 0.6522463158271706, "grad_norm": 1.4229929447174072, "learning_rate": 5.699644344562619e-05, "loss": 1.355, "step": 18213 }, { "epoch": 0.6522821279567389, "grad_norm": 1.5671422481536865, "learning_rate": 5.698597200747211e-05, "loss": 1.2931, "step": 18214 }, { "epoch": 0.6523179400863073, "grad_norm": 1.9639018774032593, "learning_rate": 5.697550114801633e-05, "loss": 1.4782, "step": 18215 }, { "epoch": 0.6523537522158755, "grad_norm": 1.4121224880218506, "learning_rate": 5.696503086739961e-05, "loss": 1.2247, "step": 18216 }, { "epoch": 0.6523895643454438, "grad_norm": 2.129319429397583, "learning_rate": 5.695456116576296e-05, "loss": 1.4432, "step": 18217 }, { "epoch": 0.6524253764750121, "grad_norm": 1.39368736743927, "learning_rate": 5.6944092043247124e-05, "loss": 1.3975, "step": 18218 }, { "epoch": 0.6524611886045804, "grad_norm": 1.3888850212097168, "learning_rate": 5.693362349999303e-05, "loss": 1.5083, "step": 18219 }, { "epoch": 0.6524970007341486, "grad_norm": 1.4592702388763428, "learning_rate": 5.6923155536141404e-05, "loss": 1.449, "step": 18220 }, { "epoch": 0.6525328128637169, "grad_norm": 2.164543628692627, "learning_rate": 5.691268815183324e-05, "loss": 1.2571, "step": 18221 }, { "epoch": 0.6525686249932853, "grad_norm": 2.2034432888031006, "learning_rate": 5.690222134720927e-05, "loss": 1.562, "step": 18222 }, { "epoch": 0.6526044371228535, "grad_norm": 1.5978227853775024, "learning_rate": 5.6891755122410254e-05, "loss": 1.0764, "step": 18223 }, { "epoch": 0.6526402492524218, "grad_norm": 1.8049519062042236, "learning_rate": 5.688128947757713e-05, "loss": 1.4481, "step": 18224 }, { "epoch": 0.6526760613819901, "grad_norm": 1.7930026054382324, "learning_rate": 5.687082441285061e-05, "loss": 1.5977, "step": 18225 }, { "epoch": 0.6527118735115583, "grad_norm": 1.4128412008285522, "learning_rate": 5.6860359928371546e-05, "loss": 1.7345, "step": 18226 }, { "epoch": 0.6527476856411266, "grad_norm": 1.6190688610076904, "learning_rate": 5.6849896024280614e-05, "loss": 1.2488, "step": 18227 }, { "epoch": 0.6527834977706949, "grad_norm": 2.397874593734741, "learning_rate": 5.6839432700718743e-05, "loss": 1.2673, "step": 18228 }, { "epoch": 0.6528193099002633, "grad_norm": 2.7797553539276123, "learning_rate": 5.682896995782661e-05, "loss": 1.5766, "step": 18229 }, { "epoch": 0.6528551220298315, "grad_norm": 2.0291695594787598, "learning_rate": 5.6818507795745025e-05, "loss": 1.2149, "step": 18230 }, { "epoch": 0.6528909341593998, "grad_norm": 1.5554500818252563, "learning_rate": 5.6808046214614684e-05, "loss": 1.534, "step": 18231 }, { "epoch": 0.6529267462889681, "grad_norm": 1.7645570039749146, "learning_rate": 5.679758521457637e-05, "loss": 1.5774, "step": 18232 }, { "epoch": 0.6529625584185363, "grad_norm": 1.3924230337142944, "learning_rate": 5.678712479577086e-05, "loss": 1.1851, "step": 18233 }, { "epoch": 0.6529983705481046, "grad_norm": 1.7153980731964111, "learning_rate": 5.67766649583388e-05, "loss": 1.3035, "step": 18234 }, { "epoch": 0.6530341826776729, "grad_norm": 1.4884206056594849, "learning_rate": 5.676620570242097e-05, "loss": 1.3867, "step": 18235 }, { "epoch": 0.6530699948072413, "grad_norm": 2.1972150802612305, "learning_rate": 5.675574702815807e-05, "loss": 1.7816, "step": 18236 }, { "epoch": 0.6531058069368095, "grad_norm": 2.568058967590332, "learning_rate": 5.674528893569084e-05, "loss": 1.2583, "step": 18237 }, { "epoch": 0.6531416190663778, "grad_norm": 1.8737038373947144, "learning_rate": 5.673483142515988e-05, "loss": 1.2972, "step": 18238 }, { "epoch": 0.6531774311959461, "grad_norm": 1.451999545097351, "learning_rate": 5.672437449670605e-05, "loss": 1.5523, "step": 18239 }, { "epoch": 0.6532132433255143, "grad_norm": 1.7182345390319824, "learning_rate": 5.6713918150469916e-05, "loss": 1.3202, "step": 18240 }, { "epoch": 0.6532490554550826, "grad_norm": 2.7142200469970703, "learning_rate": 5.6703462386592145e-05, "loss": 1.6818, "step": 18241 }, { "epoch": 0.6532848675846509, "grad_norm": 1.4753506183624268, "learning_rate": 5.6693007205213444e-05, "loss": 1.4071, "step": 18242 }, { "epoch": 0.6533206797142193, "grad_norm": 1.5137279033660889, "learning_rate": 5.668255260647447e-05, "loss": 1.6646, "step": 18243 }, { "epoch": 0.6533564918437875, "grad_norm": 1.8348652124404907, "learning_rate": 5.667209859051592e-05, "loss": 1.3507, "step": 18244 }, { "epoch": 0.6533923039733558, "grad_norm": 1.683578610420227, "learning_rate": 5.6661645157478336e-05, "loss": 1.1708, "step": 18245 }, { "epoch": 0.6534281161029241, "grad_norm": 1.2713277339935303, "learning_rate": 5.665119230750243e-05, "loss": 1.412, "step": 18246 }, { "epoch": 0.6534639282324923, "grad_norm": 1.712204098701477, "learning_rate": 5.664074004072881e-05, "loss": 1.2872, "step": 18247 }, { "epoch": 0.6534997403620606, "grad_norm": 1.6084846258163452, "learning_rate": 5.663028835729815e-05, "loss": 1.4954, "step": 18248 }, { "epoch": 0.6535355524916289, "grad_norm": 1.7049838304519653, "learning_rate": 5.661983725735096e-05, "loss": 1.5376, "step": 18249 }, { "epoch": 0.6535713646211972, "grad_norm": 2.174365997314453, "learning_rate": 5.6609386741027915e-05, "loss": 1.7882, "step": 18250 }, { "epoch": 0.6536071767507655, "grad_norm": 1.3150783777236938, "learning_rate": 5.659893680846965e-05, "loss": 1.4351, "step": 18251 }, { "epoch": 0.6536429888803338, "grad_norm": 1.477768063545227, "learning_rate": 5.658848745981667e-05, "loss": 1.6592, "step": 18252 }, { "epoch": 0.653678801009902, "grad_norm": 1.5167200565338135, "learning_rate": 5.6578038695209566e-05, "loss": 1.3087, "step": 18253 }, { "epoch": 0.6537146131394703, "grad_norm": 1.7359503507614136, "learning_rate": 5.656759051478897e-05, "loss": 1.5542, "step": 18254 }, { "epoch": 0.6537504252690386, "grad_norm": 1.6979146003723145, "learning_rate": 5.655714291869544e-05, "loss": 1.6755, "step": 18255 }, { "epoch": 0.6537862373986069, "grad_norm": 1.4741054773330688, "learning_rate": 5.654669590706948e-05, "loss": 1.3516, "step": 18256 }, { "epoch": 0.6538220495281752, "grad_norm": 1.841018795967102, "learning_rate": 5.653624948005167e-05, "loss": 1.3925, "step": 18257 }, { "epoch": 0.6538578616577435, "grad_norm": 1.7704675197601318, "learning_rate": 5.6525803637782614e-05, "loss": 1.8108, "step": 18258 }, { "epoch": 0.6538936737873118, "grad_norm": 1.8896334171295166, "learning_rate": 5.651535838040275e-05, "loss": 1.6515, "step": 18259 }, { "epoch": 0.65392948591688, "grad_norm": 1.491990327835083, "learning_rate": 5.6504913708052646e-05, "loss": 1.5978, "step": 18260 }, { "epoch": 0.6539652980464483, "grad_norm": 1.973260521888733, "learning_rate": 5.6494469620872814e-05, "loss": 1.4268, "step": 18261 }, { "epoch": 0.6540011101760166, "grad_norm": 1.601784348487854, "learning_rate": 5.648402611900383e-05, "loss": 1.2062, "step": 18262 }, { "epoch": 0.6540369223055849, "grad_norm": 1.3702232837677002, "learning_rate": 5.647358320258609e-05, "loss": 1.3752, "step": 18263 }, { "epoch": 0.6540727344351532, "grad_norm": 1.3767213821411133, "learning_rate": 5.6463140871760144e-05, "loss": 1.5021, "step": 18264 }, { "epoch": 0.6541085465647215, "grad_norm": 1.3063963651657104, "learning_rate": 5.6452699126666486e-05, "loss": 1.5594, "step": 18265 }, { "epoch": 0.6541443586942898, "grad_norm": 1.5058377981185913, "learning_rate": 5.644225796744562e-05, "loss": 1.166, "step": 18266 }, { "epoch": 0.654180170823858, "grad_norm": 1.676900863647461, "learning_rate": 5.6431817394237964e-05, "loss": 1.589, "step": 18267 }, { "epoch": 0.6542159829534263, "grad_norm": 1.7111408710479736, "learning_rate": 5.6421377407183997e-05, "loss": 1.5941, "step": 18268 }, { "epoch": 0.6542517950829946, "grad_norm": 1.4880335330963135, "learning_rate": 5.641093800642423e-05, "loss": 1.2594, "step": 18269 }, { "epoch": 0.6542876072125628, "grad_norm": 2.1994059085845947, "learning_rate": 5.640049919209902e-05, "loss": 1.6901, "step": 18270 }, { "epoch": 0.6543234193421312, "grad_norm": 1.6333506107330322, "learning_rate": 5.6390060964348845e-05, "loss": 1.4167, "step": 18271 }, { "epoch": 0.6543592314716995, "grad_norm": 1.4062167406082153, "learning_rate": 5.637962332331416e-05, "loss": 1.2515, "step": 18272 }, { "epoch": 0.6543950436012678, "grad_norm": 2.1119511127471924, "learning_rate": 5.636918626913541e-05, "loss": 1.6651, "step": 18273 }, { "epoch": 0.654430855730836, "grad_norm": 1.8894344568252563, "learning_rate": 5.6358749801952946e-05, "loss": 1.7571, "step": 18274 }, { "epoch": 0.6544666678604043, "grad_norm": 1.7292144298553467, "learning_rate": 5.63483139219072e-05, "loss": 1.4696, "step": 18275 }, { "epoch": 0.6545024799899726, "grad_norm": 1.5995243787765503, "learning_rate": 5.633787862913864e-05, "loss": 1.1873, "step": 18276 }, { "epoch": 0.6545382921195408, "grad_norm": 1.5615102052688599, "learning_rate": 5.6327443923787546e-05, "loss": 1.3503, "step": 18277 }, { "epoch": 0.6545741042491092, "grad_norm": 1.3338347673416138, "learning_rate": 5.631700980599437e-05, "loss": 1.4405, "step": 18278 }, { "epoch": 0.6546099163786775, "grad_norm": 1.7192423343658447, "learning_rate": 5.630657627589948e-05, "loss": 1.7277, "step": 18279 }, { "epoch": 0.6546457285082458, "grad_norm": 2.087306499481201, "learning_rate": 5.629614333364328e-05, "loss": 1.3391, "step": 18280 }, { "epoch": 0.654681540637814, "grad_norm": 1.4687906503677368, "learning_rate": 5.628571097936606e-05, "loss": 1.3238, "step": 18281 }, { "epoch": 0.6547173527673823, "grad_norm": 2.546259641647339, "learning_rate": 5.627527921320821e-05, "loss": 1.4288, "step": 18282 }, { "epoch": 0.6547531648969506, "grad_norm": 1.732775092124939, "learning_rate": 5.626484803531008e-05, "loss": 1.4941, "step": 18283 }, { "epoch": 0.6547889770265188, "grad_norm": 1.4935340881347656, "learning_rate": 5.625441744581205e-05, "loss": 1.154, "step": 18284 }, { "epoch": 0.6548247891560872, "grad_norm": 2.029008150100708, "learning_rate": 5.624398744485435e-05, "loss": 1.4768, "step": 18285 }, { "epoch": 0.6548606012856555, "grad_norm": 1.9982153177261353, "learning_rate": 5.623355803257737e-05, "loss": 1.6078, "step": 18286 }, { "epoch": 0.6548964134152238, "grad_norm": 1.5812995433807373, "learning_rate": 5.622312920912145e-05, "loss": 1.6001, "step": 18287 }, { "epoch": 0.654932225544792, "grad_norm": 2.2737843990325928, "learning_rate": 5.621270097462682e-05, "loss": 1.7967, "step": 18288 }, { "epoch": 0.6549680376743603, "grad_norm": 2.59417724609375, "learning_rate": 5.620227332923382e-05, "loss": 1.2416, "step": 18289 }, { "epoch": 0.6550038498039286, "grad_norm": 1.4791409969329834, "learning_rate": 5.619184627308273e-05, "loss": 1.3457, "step": 18290 }, { "epoch": 0.6550396619334968, "grad_norm": 1.6634405851364136, "learning_rate": 5.618141980631389e-05, "loss": 1.632, "step": 18291 }, { "epoch": 0.6550754740630652, "grad_norm": 2.015676975250244, "learning_rate": 5.617099392906751e-05, "loss": 1.3177, "step": 18292 }, { "epoch": 0.6551112861926335, "grad_norm": 1.8615721464157104, "learning_rate": 5.61605686414838e-05, "loss": 1.6381, "step": 18293 }, { "epoch": 0.6551470983222017, "grad_norm": 1.3926013708114624, "learning_rate": 5.615014394370317e-05, "loss": 0.9642, "step": 18294 }, { "epoch": 0.65518291045177, "grad_norm": 1.9081752300262451, "learning_rate": 5.6139719835865745e-05, "loss": 1.5377, "step": 18295 }, { "epoch": 0.6552187225813383, "grad_norm": 1.6380687952041626, "learning_rate": 5.612929631811181e-05, "loss": 1.3983, "step": 18296 }, { "epoch": 0.6552545347109066, "grad_norm": 1.7119892835617065, "learning_rate": 5.611887339058162e-05, "loss": 1.6677, "step": 18297 }, { "epoch": 0.6552903468404748, "grad_norm": 1.7411001920700073, "learning_rate": 5.610845105341542e-05, "loss": 1.6975, "step": 18298 }, { "epoch": 0.6553261589700432, "grad_norm": 2.2733662128448486, "learning_rate": 5.609802930675335e-05, "loss": 1.448, "step": 18299 }, { "epoch": 0.6553619710996115, "grad_norm": 3.091132640838623, "learning_rate": 5.608760815073567e-05, "loss": 1.3997, "step": 18300 }, { "epoch": 0.6553977832291797, "grad_norm": 1.3527662754058838, "learning_rate": 5.6077187585502624e-05, "loss": 1.2382, "step": 18301 }, { "epoch": 0.655433595358748, "grad_norm": 1.2386977672576904, "learning_rate": 5.6066767611194316e-05, "loss": 1.4487, "step": 18302 }, { "epoch": 0.6554694074883163, "grad_norm": 2.7018849849700928, "learning_rate": 5.6056348227951025e-05, "loss": 1.5491, "step": 18303 }, { "epoch": 0.6555052196178845, "grad_norm": 1.7741349935531616, "learning_rate": 5.6045929435912805e-05, "loss": 1.446, "step": 18304 }, { "epoch": 0.6555410317474528, "grad_norm": 1.929344892501831, "learning_rate": 5.603551123521997e-05, "loss": 1.2201, "step": 18305 }, { "epoch": 0.6555768438770212, "grad_norm": 1.3161699771881104, "learning_rate": 5.60250936260126e-05, "loss": 1.4935, "step": 18306 }, { "epoch": 0.6556126560065895, "grad_norm": 1.606744647026062, "learning_rate": 5.601467660843087e-05, "loss": 1.2209, "step": 18307 }, { "epoch": 0.6556484681361577, "grad_norm": 1.6163558959960938, "learning_rate": 5.600426018261493e-05, "loss": 1.7188, "step": 18308 }, { "epoch": 0.655684280265726, "grad_norm": 1.3618152141571045, "learning_rate": 5.599384434870496e-05, "loss": 1.2453, "step": 18309 }, { "epoch": 0.6557200923952943, "grad_norm": 1.6127922534942627, "learning_rate": 5.5983429106841046e-05, "loss": 1.5509, "step": 18310 }, { "epoch": 0.6557559045248625, "grad_norm": 1.6013727188110352, "learning_rate": 5.597301445716323e-05, "loss": 1.6438, "step": 18311 }, { "epoch": 0.6557917166544308, "grad_norm": 1.5582809448242188, "learning_rate": 5.59626003998118e-05, "loss": 1.4424, "step": 18312 }, { "epoch": 0.6558275287839992, "grad_norm": 1.5921964645385742, "learning_rate": 5.595218693492674e-05, "loss": 1.3418, "step": 18313 }, { "epoch": 0.6558633409135675, "grad_norm": 1.9930670261383057, "learning_rate": 5.594177406264822e-05, "loss": 1.6266, "step": 18314 }, { "epoch": 0.6558991530431357, "grad_norm": 1.9355586767196655, "learning_rate": 5.593136178311622e-05, "loss": 1.26, "step": 18315 }, { "epoch": 0.655934965172704, "grad_norm": 2.1823208332061768, "learning_rate": 5.592095009647099e-05, "loss": 1.57, "step": 18316 }, { "epoch": 0.6559707773022723, "grad_norm": 1.3704748153686523, "learning_rate": 5.591053900285248e-05, "loss": 1.3415, "step": 18317 }, { "epoch": 0.6560065894318405, "grad_norm": 1.7008016109466553, "learning_rate": 5.590012850240083e-05, "loss": 1.5811, "step": 18318 }, { "epoch": 0.6560424015614088, "grad_norm": 2.976754665374756, "learning_rate": 5.5889718595256026e-05, "loss": 1.6042, "step": 18319 }, { "epoch": 0.6560782136909772, "grad_norm": 1.7134968042373657, "learning_rate": 5.587930928155816e-05, "loss": 1.2071, "step": 18320 }, { "epoch": 0.6561140258205455, "grad_norm": 1.4464317560195923, "learning_rate": 5.586890056144732e-05, "loss": 1.6427, "step": 18321 }, { "epoch": 0.6561498379501137, "grad_norm": 2.000373125076294, "learning_rate": 5.585849243506342e-05, "loss": 1.3813, "step": 18322 }, { "epoch": 0.656185650079682, "grad_norm": 1.743994951248169, "learning_rate": 5.584808490254664e-05, "loss": 1.4915, "step": 18323 }, { "epoch": 0.6562214622092503, "grad_norm": 1.9050312042236328, "learning_rate": 5.5837677964036894e-05, "loss": 1.6474, "step": 18324 }, { "epoch": 0.6562572743388185, "grad_norm": 1.8708951473236084, "learning_rate": 5.582727161967425e-05, "loss": 1.3392, "step": 18325 }, { "epoch": 0.6562930864683868, "grad_norm": 1.4769537448883057, "learning_rate": 5.5816865869598625e-05, "loss": 1.4556, "step": 18326 }, { "epoch": 0.6563288985979552, "grad_norm": 1.4558557271957397, "learning_rate": 5.5806460713950145e-05, "loss": 1.4495, "step": 18327 }, { "epoch": 0.6563647107275234, "grad_norm": 2.0436086654663086, "learning_rate": 5.579605615286874e-05, "loss": 1.5254, "step": 18328 }, { "epoch": 0.6564005228570917, "grad_norm": 1.4093581438064575, "learning_rate": 5.578565218649433e-05, "loss": 1.4388, "step": 18329 }, { "epoch": 0.65643633498666, "grad_norm": 1.895922064781189, "learning_rate": 5.577524881496694e-05, "loss": 1.2775, "step": 18330 }, { "epoch": 0.6564721471162283, "grad_norm": 2.0171585083007812, "learning_rate": 5.5764846038426535e-05, "loss": 1.3864, "step": 18331 }, { "epoch": 0.6565079592457965, "grad_norm": 2.5566394329071045, "learning_rate": 5.57544438570131e-05, "loss": 1.6843, "step": 18332 }, { "epoch": 0.6565437713753648, "grad_norm": 1.510467290878296, "learning_rate": 5.574404227086648e-05, "loss": 1.4668, "step": 18333 }, { "epoch": 0.6565795835049332, "grad_norm": 1.726326584815979, "learning_rate": 5.573364128012677e-05, "loss": 1.2047, "step": 18334 }, { "epoch": 0.6566153956345014, "grad_norm": 1.3015578985214233, "learning_rate": 5.572324088493377e-05, "loss": 1.3682, "step": 18335 }, { "epoch": 0.6566512077640697, "grad_norm": 2.0535759925842285, "learning_rate": 5.571284108542748e-05, "loss": 1.4943, "step": 18336 }, { "epoch": 0.656687019893638, "grad_norm": 1.3764218091964722, "learning_rate": 5.5702441881747755e-05, "loss": 1.1779, "step": 18337 }, { "epoch": 0.6567228320232062, "grad_norm": 2.7973520755767822, "learning_rate": 5.5692043274034544e-05, "loss": 1.6795, "step": 18338 }, { "epoch": 0.6567586441527745, "grad_norm": 1.9879013299942017, "learning_rate": 5.568164526242776e-05, "loss": 1.4525, "step": 18339 }, { "epoch": 0.6567944562823428, "grad_norm": 1.5268080234527588, "learning_rate": 5.5671247847067254e-05, "loss": 1.7501, "step": 18340 }, { "epoch": 0.6568302684119112, "grad_norm": 1.5404284000396729, "learning_rate": 5.566085102809291e-05, "loss": 1.3821, "step": 18341 }, { "epoch": 0.6568660805414794, "grad_norm": 1.526283860206604, "learning_rate": 5.565045480564463e-05, "loss": 1.3779, "step": 18342 }, { "epoch": 0.6569018926710477, "grad_norm": 1.485095739364624, "learning_rate": 5.5640059179862314e-05, "loss": 1.3956, "step": 18343 }, { "epoch": 0.656937704800616, "grad_norm": 1.6767833232879639, "learning_rate": 5.562966415088574e-05, "loss": 1.6607, "step": 18344 }, { "epoch": 0.6569735169301842, "grad_norm": 1.563358187675476, "learning_rate": 5.5619269718854805e-05, "loss": 1.8258, "step": 18345 }, { "epoch": 0.6570093290597525, "grad_norm": 1.4730910062789917, "learning_rate": 5.560887588390938e-05, "loss": 1.1704, "step": 18346 }, { "epoch": 0.6570451411893208, "grad_norm": 1.5037370920181274, "learning_rate": 5.559848264618923e-05, "loss": 1.1723, "step": 18347 }, { "epoch": 0.6570809533188892, "grad_norm": 1.9317376613616943, "learning_rate": 5.5588090005834224e-05, "loss": 1.3406, "step": 18348 }, { "epoch": 0.6571167654484574, "grad_norm": 1.5945675373077393, "learning_rate": 5.5577697962984195e-05, "loss": 1.4684, "step": 18349 }, { "epoch": 0.6571525775780257, "grad_norm": 1.4103537797927856, "learning_rate": 5.556730651777897e-05, "loss": 1.3841, "step": 18350 }, { "epoch": 0.657188389707594, "grad_norm": 1.6418267488479614, "learning_rate": 5.555691567035828e-05, "loss": 1.4893, "step": 18351 }, { "epoch": 0.6572242018371622, "grad_norm": 1.3882322311401367, "learning_rate": 5.554652542086196e-05, "loss": 1.4669, "step": 18352 }, { "epoch": 0.6572600139667305, "grad_norm": 1.9791117906570435, "learning_rate": 5.5536135769429795e-05, "loss": 1.3963, "step": 18353 }, { "epoch": 0.6572958260962988, "grad_norm": 2.5772440433502197, "learning_rate": 5.552574671620161e-05, "loss": 1.4643, "step": 18354 }, { "epoch": 0.6573316382258672, "grad_norm": 2.2931079864501953, "learning_rate": 5.551535826131711e-05, "loss": 1.4, "step": 18355 }, { "epoch": 0.6573674503554354, "grad_norm": 2.633115768432617, "learning_rate": 5.5504970404916066e-05, "loss": 1.1993, "step": 18356 }, { "epoch": 0.6574032624850037, "grad_norm": 1.4638112783432007, "learning_rate": 5.54945831471383e-05, "loss": 1.5759, "step": 18357 }, { "epoch": 0.657439074614572, "grad_norm": 1.9851884841918945, "learning_rate": 5.548419648812346e-05, "loss": 1.5184, "step": 18358 }, { "epoch": 0.6574748867441402, "grad_norm": 1.440222978591919, "learning_rate": 5.547381042801135e-05, "loss": 1.5711, "step": 18359 }, { "epoch": 0.6575106988737085, "grad_norm": 1.8197391033172607, "learning_rate": 5.5463424966941676e-05, "loss": 1.0896, "step": 18360 }, { "epoch": 0.6575465110032768, "grad_norm": 1.7370043992996216, "learning_rate": 5.545304010505421e-05, "loss": 1.5494, "step": 18361 }, { "epoch": 0.6575823231328451, "grad_norm": 1.4990167617797852, "learning_rate": 5.54426558424886e-05, "loss": 1.5032, "step": 18362 }, { "epoch": 0.6576181352624134, "grad_norm": 2.4379074573516846, "learning_rate": 5.543227217938457e-05, "loss": 1.5814, "step": 18363 }, { "epoch": 0.6576539473919817, "grad_norm": 1.6144640445709229, "learning_rate": 5.5421889115881875e-05, "loss": 1.3925, "step": 18364 }, { "epoch": 0.65768975952155, "grad_norm": 1.5260868072509766, "learning_rate": 5.5411506652120115e-05, "loss": 1.5798, "step": 18365 }, { "epoch": 0.6577255716511182, "grad_norm": 1.313092589378357, "learning_rate": 5.540112478823902e-05, "loss": 1.3203, "step": 18366 }, { "epoch": 0.6577613837806865, "grad_norm": 1.5859086513519287, "learning_rate": 5.5390743524378266e-05, "loss": 1.5731, "step": 18367 }, { "epoch": 0.6577971959102548, "grad_norm": 1.383134365081787, "learning_rate": 5.538036286067756e-05, "loss": 1.347, "step": 18368 }, { "epoch": 0.6578330080398231, "grad_norm": 1.9735016822814941, "learning_rate": 5.5369982797276454e-05, "loss": 1.6849, "step": 18369 }, { "epoch": 0.6578688201693914, "grad_norm": 1.9564305543899536, "learning_rate": 5.5359603334314695e-05, "loss": 1.2313, "step": 18370 }, { "epoch": 0.6579046322989597, "grad_norm": 1.9404182434082031, "learning_rate": 5.534922447193187e-05, "loss": 1.7799, "step": 18371 }, { "epoch": 0.657940444428528, "grad_norm": 1.4757919311523438, "learning_rate": 5.533884621026767e-05, "loss": 1.3568, "step": 18372 }, { "epoch": 0.6579762565580962, "grad_norm": 1.4914617538452148, "learning_rate": 5.5328468549461657e-05, "loss": 1.5968, "step": 18373 }, { "epoch": 0.6580120686876645, "grad_norm": 1.624997615814209, "learning_rate": 5.531809148965347e-05, "loss": 1.6594, "step": 18374 }, { "epoch": 0.6580478808172328, "grad_norm": 1.5839698314666748, "learning_rate": 5.530771503098278e-05, "loss": 1.5126, "step": 18375 }, { "epoch": 0.6580836929468011, "grad_norm": 1.6808476448059082, "learning_rate": 5.529733917358908e-05, "loss": 1.7064, "step": 18376 }, { "epoch": 0.6581195050763694, "grad_norm": 1.3805513381958008, "learning_rate": 5.528696391761201e-05, "loss": 1.4942, "step": 18377 }, { "epoch": 0.6581553172059377, "grad_norm": 1.3921979665756226, "learning_rate": 5.527658926319119e-05, "loss": 1.5189, "step": 18378 }, { "epoch": 0.6581911293355059, "grad_norm": 1.867268681526184, "learning_rate": 5.52662152104662e-05, "loss": 1.3446, "step": 18379 }, { "epoch": 0.6582269414650742, "grad_norm": 1.994785189628601, "learning_rate": 5.5255841759576544e-05, "loss": 1.253, "step": 18380 }, { "epoch": 0.6582627535946425, "grad_norm": 1.367926836013794, "learning_rate": 5.524546891066182e-05, "loss": 1.4459, "step": 18381 }, { "epoch": 0.6582985657242107, "grad_norm": 1.71571946144104, "learning_rate": 5.5235096663861617e-05, "loss": 1.8809, "step": 18382 }, { "epoch": 0.6583343778537791, "grad_norm": 1.36968195438385, "learning_rate": 5.5224725019315416e-05, "loss": 1.5383, "step": 18383 }, { "epoch": 0.6583701899833474, "grad_norm": 2.195829391479492, "learning_rate": 5.521435397716278e-05, "loss": 1.4893, "step": 18384 }, { "epoch": 0.6584060021129157, "grad_norm": 1.5559254884719849, "learning_rate": 5.520398353754324e-05, "loss": 1.3855, "step": 18385 }, { "epoch": 0.6584418142424839, "grad_norm": 1.5635045766830444, "learning_rate": 5.519361370059637e-05, "loss": 1.3387, "step": 18386 }, { "epoch": 0.6584776263720522, "grad_norm": 1.6653882265090942, "learning_rate": 5.518324446646157e-05, "loss": 1.7117, "step": 18387 }, { "epoch": 0.6585134385016205, "grad_norm": 1.4553028345108032, "learning_rate": 5.517287583527843e-05, "loss": 1.4408, "step": 18388 }, { "epoch": 0.6585492506311887, "grad_norm": 1.7703020572662354, "learning_rate": 5.51625078071864e-05, "loss": 1.4806, "step": 18389 }, { "epoch": 0.6585850627607571, "grad_norm": 1.610481858253479, "learning_rate": 5.5152140382325044e-05, "loss": 1.4433, "step": 18390 }, { "epoch": 0.6586208748903254, "grad_norm": 1.5309536457061768, "learning_rate": 5.5141773560833756e-05, "loss": 1.7228, "step": 18391 }, { "epoch": 0.6586566870198937, "grad_norm": 1.8306372165679932, "learning_rate": 5.5131407342852026e-05, "loss": 1.1958, "step": 18392 }, { "epoch": 0.6586924991494619, "grad_norm": 1.701582431793213, "learning_rate": 5.5121041728519386e-05, "loss": 1.4244, "step": 18393 }, { "epoch": 0.6587283112790302, "grad_norm": 1.6949228048324585, "learning_rate": 5.5110676717975194e-05, "loss": 1.3162, "step": 18394 }, { "epoch": 0.6587641234085985, "grad_norm": 1.2450233697891235, "learning_rate": 5.510031231135895e-05, "loss": 1.482, "step": 18395 }, { "epoch": 0.6587999355381667, "grad_norm": 1.6780352592468262, "learning_rate": 5.508994850881008e-05, "loss": 1.4635, "step": 18396 }, { "epoch": 0.6588357476677351, "grad_norm": 2.5002284049987793, "learning_rate": 5.507958531046806e-05, "loss": 1.4665, "step": 18397 }, { "epoch": 0.6588715597973034, "grad_norm": 1.386528730392456, "learning_rate": 5.506922271647228e-05, "loss": 1.7111, "step": 18398 }, { "epoch": 0.6589073719268717, "grad_norm": 1.9130560159683228, "learning_rate": 5.505886072696208e-05, "loss": 1.3824, "step": 18399 }, { "epoch": 0.6589431840564399, "grad_norm": 1.4952514171600342, "learning_rate": 5.504849934207701e-05, "loss": 1.4765, "step": 18400 }, { "epoch": 0.6589789961860082, "grad_norm": 1.4171199798583984, "learning_rate": 5.503813856195637e-05, "loss": 1.3389, "step": 18401 }, { "epoch": 0.6590148083155765, "grad_norm": 1.6343742609024048, "learning_rate": 5.5027778386739606e-05, "loss": 1.488, "step": 18402 }, { "epoch": 0.6590506204451447, "grad_norm": 1.364643931388855, "learning_rate": 5.5017418816565994e-05, "loss": 1.5663, "step": 18403 }, { "epoch": 0.6590864325747131, "grad_norm": 2.7921016216278076, "learning_rate": 5.500705985157508e-05, "loss": 1.5698, "step": 18404 }, { "epoch": 0.6591222447042814, "grad_norm": 1.4503824710845947, "learning_rate": 5.499670149190609e-05, "loss": 1.3045, "step": 18405 }, { "epoch": 0.6591580568338496, "grad_norm": 1.6705472469329834, "learning_rate": 5.498634373769843e-05, "loss": 1.6779, "step": 18406 }, { "epoch": 0.6591938689634179, "grad_norm": 1.5752532482147217, "learning_rate": 5.497598658909149e-05, "loss": 1.4986, "step": 18407 }, { "epoch": 0.6592296810929862, "grad_norm": 1.343363642692566, "learning_rate": 5.496563004622455e-05, "loss": 1.4247, "step": 18408 }, { "epoch": 0.6592654932225545, "grad_norm": 1.5922566652297974, "learning_rate": 5.495527410923699e-05, "loss": 1.5302, "step": 18409 }, { "epoch": 0.6593013053521227, "grad_norm": 1.7536109685897827, "learning_rate": 5.494491877826804e-05, "loss": 1.1251, "step": 18410 }, { "epoch": 0.6593371174816911, "grad_norm": 1.656254768371582, "learning_rate": 5.493456405345716e-05, "loss": 1.4341, "step": 18411 }, { "epoch": 0.6593729296112594, "grad_norm": 1.5095834732055664, "learning_rate": 5.492420993494357e-05, "loss": 1.4502, "step": 18412 }, { "epoch": 0.6594087417408276, "grad_norm": 1.864039421081543, "learning_rate": 5.491385642286662e-05, "loss": 1.2983, "step": 18413 }, { "epoch": 0.6594445538703959, "grad_norm": 1.595781922340393, "learning_rate": 5.49035035173655e-05, "loss": 1.332, "step": 18414 }, { "epoch": 0.6594803659999642, "grad_norm": 1.7632122039794922, "learning_rate": 5.4893151218579655e-05, "loss": 1.5275, "step": 18415 }, { "epoch": 0.6595161781295324, "grad_norm": 1.8122587203979492, "learning_rate": 5.488279952664826e-05, "loss": 1.4587, "step": 18416 }, { "epoch": 0.6595519902591007, "grad_norm": 2.209500789642334, "learning_rate": 5.4872448441710536e-05, "loss": 1.665, "step": 18417 }, { "epoch": 0.659587802388669, "grad_norm": 1.7916383743286133, "learning_rate": 5.4862097963905865e-05, "loss": 1.6607, "step": 18418 }, { "epoch": 0.6596236145182374, "grad_norm": 1.0907057523727417, "learning_rate": 5.485174809337342e-05, "loss": 1.2834, "step": 18419 }, { "epoch": 0.6596594266478056, "grad_norm": 1.6302517652511597, "learning_rate": 5.484139883025251e-05, "loss": 1.4004, "step": 18420 }, { "epoch": 0.6596952387773739, "grad_norm": 1.5323625802993774, "learning_rate": 5.4831050174682243e-05, "loss": 1.5594, "step": 18421 }, { "epoch": 0.6597310509069422, "grad_norm": 1.9435088634490967, "learning_rate": 5.482070212680201e-05, "loss": 1.5246, "step": 18422 }, { "epoch": 0.6597668630365104, "grad_norm": 2.1833503246307373, "learning_rate": 5.481035468675092e-05, "loss": 1.4532, "step": 18423 }, { "epoch": 0.6598026751660787, "grad_norm": 1.534131646156311, "learning_rate": 5.4800007854668254e-05, "loss": 1.3368, "step": 18424 }, { "epoch": 0.659838487295647, "grad_norm": 2.482542037963867, "learning_rate": 5.478966163069313e-05, "loss": 1.5933, "step": 18425 }, { "epoch": 0.6598742994252154, "grad_norm": 1.8399754762649536, "learning_rate": 5.47793160149648e-05, "loss": 1.5144, "step": 18426 }, { "epoch": 0.6599101115547836, "grad_norm": 1.5900001525878906, "learning_rate": 5.476897100762248e-05, "loss": 1.6037, "step": 18427 }, { "epoch": 0.6599459236843519, "grad_norm": 1.6989408731460571, "learning_rate": 5.475862660880529e-05, "loss": 1.1131, "step": 18428 }, { "epoch": 0.6599817358139202, "grad_norm": 1.6499292850494385, "learning_rate": 5.4748282818652386e-05, "loss": 1.4069, "step": 18429 }, { "epoch": 0.6600175479434884, "grad_norm": 1.6214898824691772, "learning_rate": 5.473793963730299e-05, "loss": 1.3842, "step": 18430 }, { "epoch": 0.6600533600730567, "grad_norm": 1.480055332183838, "learning_rate": 5.4727597064896276e-05, "loss": 1.4681, "step": 18431 }, { "epoch": 0.660089172202625, "grad_norm": 1.5914380550384521, "learning_rate": 5.4717255101571253e-05, "loss": 1.3343, "step": 18432 }, { "epoch": 0.6601249843321934, "grad_norm": 1.7542521953582764, "learning_rate": 5.470691374746724e-05, "loss": 1.5553, "step": 18433 }, { "epoch": 0.6601607964617616, "grad_norm": 2.195375442504883, "learning_rate": 5.469657300272326e-05, "loss": 1.4683, "step": 18434 }, { "epoch": 0.6601966085913299, "grad_norm": 1.4942426681518555, "learning_rate": 5.468623286747844e-05, "loss": 1.2783, "step": 18435 }, { "epoch": 0.6602324207208982, "grad_norm": 1.32637619972229, "learning_rate": 5.4675893341871886e-05, "loss": 1.4105, "step": 18436 }, { "epoch": 0.6602682328504664, "grad_norm": 1.3210185766220093, "learning_rate": 5.4665554426042734e-05, "loss": 1.0158, "step": 18437 }, { "epoch": 0.6603040449800347, "grad_norm": 1.8867077827453613, "learning_rate": 5.465521612013012e-05, "loss": 1.3697, "step": 18438 }, { "epoch": 0.660339857109603, "grad_norm": 1.9586296081542969, "learning_rate": 5.464487842427302e-05, "loss": 1.6983, "step": 18439 }, { "epoch": 0.6603756692391713, "grad_norm": 1.508347511291504, "learning_rate": 5.463454133861059e-05, "loss": 1.2857, "step": 18440 }, { "epoch": 0.6604114813687396, "grad_norm": 1.4944887161254883, "learning_rate": 5.462420486328188e-05, "loss": 1.3681, "step": 18441 }, { "epoch": 0.6604472934983079, "grad_norm": 1.4582613706588745, "learning_rate": 5.461386899842601e-05, "loss": 1.1148, "step": 18442 }, { "epoch": 0.6604831056278762, "grad_norm": 1.7177790403366089, "learning_rate": 5.460353374418195e-05, "loss": 1.6026, "step": 18443 }, { "epoch": 0.6605189177574444, "grad_norm": 1.6896547079086304, "learning_rate": 5.459319910068879e-05, "loss": 1.4609, "step": 18444 }, { "epoch": 0.6605547298870127, "grad_norm": 1.8076750040054321, "learning_rate": 5.4582865068085585e-05, "loss": 1.537, "step": 18445 }, { "epoch": 0.660590542016581, "grad_norm": 1.894033670425415, "learning_rate": 5.4572531646511325e-05, "loss": 1.5241, "step": 18446 }, { "epoch": 0.6606263541461493, "grad_norm": 1.7730125188827515, "learning_rate": 5.456219883610505e-05, "loss": 1.5734, "step": 18447 }, { "epoch": 0.6606621662757176, "grad_norm": 1.4446651935577393, "learning_rate": 5.455186663700578e-05, "loss": 1.6482, "step": 18448 }, { "epoch": 0.6606979784052859, "grad_norm": 1.781532883644104, "learning_rate": 5.4541535049352566e-05, "loss": 1.6462, "step": 18449 }, { "epoch": 0.6607337905348541, "grad_norm": 1.8322112560272217, "learning_rate": 5.4531204073284316e-05, "loss": 1.7429, "step": 18450 }, { "epoch": 0.6607696026644224, "grad_norm": 1.6175395250320435, "learning_rate": 5.4520873708940056e-05, "loss": 1.3589, "step": 18451 }, { "epoch": 0.6608054147939907, "grad_norm": 1.7501140832901, "learning_rate": 5.451054395645883e-05, "loss": 1.6793, "step": 18452 }, { "epoch": 0.660841226923559, "grad_norm": 1.5763481855392456, "learning_rate": 5.450021481597951e-05, "loss": 1.3531, "step": 18453 }, { "epoch": 0.6608770390531273, "grad_norm": 1.719218134880066, "learning_rate": 5.448988628764111e-05, "loss": 1.3094, "step": 18454 }, { "epoch": 0.6609128511826956, "grad_norm": 1.532935619354248, "learning_rate": 5.4479558371582584e-05, "loss": 1.3702, "step": 18455 }, { "epoch": 0.6609486633122639, "grad_norm": 1.7714307308197021, "learning_rate": 5.446923106794293e-05, "loss": 1.5029, "step": 18456 }, { "epoch": 0.6609844754418321, "grad_norm": 1.7432639598846436, "learning_rate": 5.4458904376860997e-05, "loss": 1.5302, "step": 18457 }, { "epoch": 0.6610202875714004, "grad_norm": 1.608049750328064, "learning_rate": 5.444857829847576e-05, "loss": 1.1499, "step": 18458 }, { "epoch": 0.6610560997009687, "grad_norm": 1.6713452339172363, "learning_rate": 5.443825283292615e-05, "loss": 1.4979, "step": 18459 }, { "epoch": 0.661091911830537, "grad_norm": 1.5441710948944092, "learning_rate": 5.4427927980351124e-05, "loss": 1.309, "step": 18460 }, { "epoch": 0.6611277239601053, "grad_norm": 1.8041974306106567, "learning_rate": 5.441760374088949e-05, "loss": 1.2219, "step": 18461 }, { "epoch": 0.6611635360896736, "grad_norm": 1.397838830947876, "learning_rate": 5.4407280114680206e-05, "loss": 1.2665, "step": 18462 }, { "epoch": 0.6611993482192419, "grad_norm": 1.60728919506073, "learning_rate": 5.439695710186219e-05, "loss": 1.5308, "step": 18463 }, { "epoch": 0.6612351603488101, "grad_norm": 1.4930665493011475, "learning_rate": 5.4386634702574255e-05, "loss": 1.5292, "step": 18464 }, { "epoch": 0.6612709724783784, "grad_norm": 1.4357012510299683, "learning_rate": 5.437631291695533e-05, "loss": 1.4713, "step": 18465 }, { "epoch": 0.6613067846079467, "grad_norm": 1.4895292520523071, "learning_rate": 5.436599174514425e-05, "loss": 1.5546, "step": 18466 }, { "epoch": 0.6613425967375149, "grad_norm": 1.6815274953842163, "learning_rate": 5.435567118727993e-05, "loss": 1.2031, "step": 18467 }, { "epoch": 0.6613784088670833, "grad_norm": 1.4173117876052856, "learning_rate": 5.434535124350113e-05, "loss": 1.6375, "step": 18468 }, { "epoch": 0.6614142209966516, "grad_norm": 1.7272834777832031, "learning_rate": 5.433503191394675e-05, "loss": 1.4441, "step": 18469 }, { "epoch": 0.6614500331262199, "grad_norm": 1.6564527750015259, "learning_rate": 5.432471319875565e-05, "loss": 1.4019, "step": 18470 }, { "epoch": 0.6614858452557881, "grad_norm": 1.524398922920227, "learning_rate": 5.431439509806657e-05, "loss": 1.3863, "step": 18471 }, { "epoch": 0.6615216573853564, "grad_norm": 1.881738543510437, "learning_rate": 5.4304077612018375e-05, "loss": 1.5866, "step": 18472 }, { "epoch": 0.6615574695149247, "grad_norm": 1.785723328590393, "learning_rate": 5.429376074074988e-05, "loss": 1.595, "step": 18473 }, { "epoch": 0.6615932816444929, "grad_norm": 1.6554456949234009, "learning_rate": 5.4283444484399904e-05, "loss": 1.2719, "step": 18474 }, { "epoch": 0.6616290937740613, "grad_norm": 2.057368516921997, "learning_rate": 5.427312884310718e-05, "loss": 1.5038, "step": 18475 }, { "epoch": 0.6616649059036296, "grad_norm": 1.655664086341858, "learning_rate": 5.426281381701053e-05, "loss": 1.4807, "step": 18476 }, { "epoch": 0.6617007180331979, "grad_norm": 1.4010266065597534, "learning_rate": 5.4252499406248724e-05, "loss": 1.34, "step": 18477 }, { "epoch": 0.6617365301627661, "grad_norm": 1.8840229511260986, "learning_rate": 5.424218561096055e-05, "loss": 1.7182, "step": 18478 }, { "epoch": 0.6617723422923344, "grad_norm": 1.9274563789367676, "learning_rate": 5.423187243128472e-05, "loss": 1.5018, "step": 18479 }, { "epoch": 0.6618081544219027, "grad_norm": 1.7007853984832764, "learning_rate": 5.4221559867360014e-05, "loss": 1.5183, "step": 18480 }, { "epoch": 0.6618439665514709, "grad_norm": 1.4750630855560303, "learning_rate": 5.4211247919325206e-05, "loss": 1.5261, "step": 18481 }, { "epoch": 0.6618797786810393, "grad_norm": 1.4993181228637695, "learning_rate": 5.4200936587318954e-05, "loss": 1.4829, "step": 18482 }, { "epoch": 0.6619155908106076, "grad_norm": 2.4123010635375977, "learning_rate": 5.4190625871480016e-05, "loss": 1.646, "step": 18483 }, { "epoch": 0.6619514029401758, "grad_norm": 1.5104199647903442, "learning_rate": 5.4180315771947123e-05, "loss": 1.4037, "step": 18484 }, { "epoch": 0.6619872150697441, "grad_norm": 1.7275927066802979, "learning_rate": 5.417000628885902e-05, "loss": 1.6069, "step": 18485 }, { "epoch": 0.6620230271993124, "grad_norm": 1.8526519536972046, "learning_rate": 5.415969742235432e-05, "loss": 1.7501, "step": 18486 }, { "epoch": 0.6620588393288807, "grad_norm": 2.1937081813812256, "learning_rate": 5.414938917257177e-05, "loss": 1.4062, "step": 18487 }, { "epoch": 0.6620946514584489, "grad_norm": 1.4393888711929321, "learning_rate": 5.4139081539650084e-05, "loss": 1.3466, "step": 18488 }, { "epoch": 0.6621304635880173, "grad_norm": 1.2877326011657715, "learning_rate": 5.412877452372784e-05, "loss": 1.4105, "step": 18489 }, { "epoch": 0.6621662757175856, "grad_norm": 1.5457091331481934, "learning_rate": 5.411846812494379e-05, "loss": 1.4512, "step": 18490 }, { "epoch": 0.6622020878471538, "grad_norm": 2.318279266357422, "learning_rate": 5.410816234343656e-05, "loss": 1.4582, "step": 18491 }, { "epoch": 0.6622378999767221, "grad_norm": 2.045260429382324, "learning_rate": 5.4097857179344846e-05, "loss": 1.4889, "step": 18492 }, { "epoch": 0.6622737121062904, "grad_norm": 2.0480244159698486, "learning_rate": 5.4087552632807225e-05, "loss": 1.3724, "step": 18493 }, { "epoch": 0.6623095242358586, "grad_norm": 1.5088456869125366, "learning_rate": 5.407724870396235e-05, "loss": 1.4677, "step": 18494 }, { "epoch": 0.6623453363654269, "grad_norm": 1.4615206718444824, "learning_rate": 5.4066945392948896e-05, "loss": 1.0757, "step": 18495 }, { "epoch": 0.6623811484949953, "grad_norm": 1.892917275428772, "learning_rate": 5.40566426999054e-05, "loss": 1.5292, "step": 18496 }, { "epoch": 0.6624169606245636, "grad_norm": 1.6644346714019775, "learning_rate": 5.404634062497057e-05, "loss": 1.5624, "step": 18497 }, { "epoch": 0.6624527727541318, "grad_norm": 2.008518695831299, "learning_rate": 5.403603916828286e-05, "loss": 1.6561, "step": 18498 }, { "epoch": 0.6624885848837001, "grad_norm": 1.7898550033569336, "learning_rate": 5.4025738329981035e-05, "loss": 1.4068, "step": 18499 }, { "epoch": 0.6625243970132684, "grad_norm": 1.7520909309387207, "learning_rate": 5.401543811020356e-05, "loss": 1.2904, "step": 18500 }, { "epoch": 0.6625602091428366, "grad_norm": 1.633162021636963, "learning_rate": 5.400513850908905e-05, "loss": 1.3964, "step": 18501 }, { "epoch": 0.6625960212724049, "grad_norm": 2.3457908630371094, "learning_rate": 5.3994839526776065e-05, "loss": 1.8168, "step": 18502 }, { "epoch": 0.6626318334019733, "grad_norm": 1.3527460098266602, "learning_rate": 5.398454116340322e-05, "loss": 1.3832, "step": 18503 }, { "epoch": 0.6626676455315416, "grad_norm": 1.643717646598816, "learning_rate": 5.3974243419109016e-05, "loss": 1.1241, "step": 18504 }, { "epoch": 0.6627034576611098, "grad_norm": 1.5871790647506714, "learning_rate": 5.396394629403192e-05, "loss": 1.5845, "step": 18505 }, { "epoch": 0.6627392697906781, "grad_norm": 1.3667486906051636, "learning_rate": 5.395364978831061e-05, "loss": 1.5827, "step": 18506 }, { "epoch": 0.6627750819202464, "grad_norm": 1.8814173936843872, "learning_rate": 5.394335390208352e-05, "loss": 1.1862, "step": 18507 }, { "epoch": 0.6628108940498146, "grad_norm": 1.5491501092910767, "learning_rate": 5.393305863548924e-05, "loss": 1.4095, "step": 18508 }, { "epoch": 0.6628467061793829, "grad_norm": 1.667093276977539, "learning_rate": 5.392276398866615e-05, "loss": 1.4577, "step": 18509 }, { "epoch": 0.6628825183089513, "grad_norm": 1.9760993719100952, "learning_rate": 5.391246996175291e-05, "loss": 1.5, "step": 18510 }, { "epoch": 0.6629183304385196, "grad_norm": 2.045569658279419, "learning_rate": 5.39021765548879e-05, "loss": 1.6401, "step": 18511 }, { "epoch": 0.6629541425680878, "grad_norm": 1.5511012077331543, "learning_rate": 5.3891883768209686e-05, "loss": 1.3945, "step": 18512 }, { "epoch": 0.6629899546976561, "grad_norm": 1.8980215787887573, "learning_rate": 5.388159160185665e-05, "loss": 1.3399, "step": 18513 }, { "epoch": 0.6630257668272244, "grad_norm": 1.6040958166122437, "learning_rate": 5.387130005596732e-05, "loss": 1.5281, "step": 18514 }, { "epoch": 0.6630615789567926, "grad_norm": 2.547346830368042, "learning_rate": 5.386100913068017e-05, "loss": 1.5416, "step": 18515 }, { "epoch": 0.6630973910863609, "grad_norm": 1.6553081274032593, "learning_rate": 5.385071882613357e-05, "loss": 1.2632, "step": 18516 }, { "epoch": 0.6631332032159293, "grad_norm": 1.6644037961959839, "learning_rate": 5.3840429142466096e-05, "loss": 1.786, "step": 18517 }, { "epoch": 0.6631690153454975, "grad_norm": 1.7519235610961914, "learning_rate": 5.383014007981606e-05, "loss": 1.5851, "step": 18518 }, { "epoch": 0.6632048274750658, "grad_norm": 1.6565033197402954, "learning_rate": 5.381985163832197e-05, "loss": 1.5017, "step": 18519 }, { "epoch": 0.6632406396046341, "grad_norm": 1.5082019567489624, "learning_rate": 5.380956381812213e-05, "loss": 1.5402, "step": 18520 }, { "epoch": 0.6632764517342024, "grad_norm": 1.5436220169067383, "learning_rate": 5.379927661935511e-05, "loss": 1.2787, "step": 18521 }, { "epoch": 0.6633122638637706, "grad_norm": 1.599420189857483, "learning_rate": 5.3788990042159224e-05, "loss": 1.3577, "step": 18522 }, { "epoch": 0.6633480759933389, "grad_norm": 1.4425898790359497, "learning_rate": 5.377870408667285e-05, "loss": 1.3236, "step": 18523 }, { "epoch": 0.6633838881229073, "grad_norm": 1.4394543170928955, "learning_rate": 5.3768418753034375e-05, "loss": 1.5358, "step": 18524 }, { "epoch": 0.6634197002524755, "grad_norm": 1.8449910879135132, "learning_rate": 5.375813404138219e-05, "loss": 1.5375, "step": 18525 }, { "epoch": 0.6634555123820438, "grad_norm": 1.8355271816253662, "learning_rate": 5.37478499518547e-05, "loss": 1.309, "step": 18526 }, { "epoch": 0.6634913245116121, "grad_norm": 1.466902256011963, "learning_rate": 5.3737566484590164e-05, "loss": 1.3721, "step": 18527 }, { "epoch": 0.6635271366411803, "grad_norm": 1.5819684267044067, "learning_rate": 5.372728363972706e-05, "loss": 1.4732, "step": 18528 }, { "epoch": 0.6635629487707486, "grad_norm": 1.633408546447754, "learning_rate": 5.371700141740364e-05, "loss": 1.9911, "step": 18529 }, { "epoch": 0.6635987609003169, "grad_norm": 1.6528607606887817, "learning_rate": 5.3706719817758286e-05, "loss": 1.5908, "step": 18530 }, { "epoch": 0.6636345730298853, "grad_norm": 2.3122167587280273, "learning_rate": 5.3696438840929276e-05, "loss": 1.552, "step": 18531 }, { "epoch": 0.6636703851594535, "grad_norm": 1.6594113111495972, "learning_rate": 5.368615848705496e-05, "loss": 1.7639, "step": 18532 }, { "epoch": 0.6637061972890218, "grad_norm": 1.49611496925354, "learning_rate": 5.367587875627367e-05, "loss": 1.2976, "step": 18533 }, { "epoch": 0.6637420094185901, "grad_norm": 1.8043385744094849, "learning_rate": 5.366559964872364e-05, "loss": 1.3638, "step": 18534 }, { "epoch": 0.6637778215481583, "grad_norm": 1.6629353761672974, "learning_rate": 5.36553211645432e-05, "loss": 1.504, "step": 18535 }, { "epoch": 0.6638136336777266, "grad_norm": 2.4057998657226562, "learning_rate": 5.3645043303870634e-05, "loss": 1.3973, "step": 18536 }, { "epoch": 0.6638494458072949, "grad_norm": 1.4145482778549194, "learning_rate": 5.363476606684425e-05, "loss": 1.411, "step": 18537 }, { "epoch": 0.6638852579368633, "grad_norm": 1.9292107820510864, "learning_rate": 5.3624489453602255e-05, "loss": 1.4168, "step": 18538 }, { "epoch": 0.6639210700664315, "grad_norm": 2.106179714202881, "learning_rate": 5.361421346428294e-05, "loss": 1.4649, "step": 18539 }, { "epoch": 0.6639568821959998, "grad_norm": 1.8230222463607788, "learning_rate": 5.3603938099024576e-05, "loss": 1.3864, "step": 18540 }, { "epoch": 0.6639926943255681, "grad_norm": 1.3416988849639893, "learning_rate": 5.359366335796534e-05, "loss": 1.3887, "step": 18541 }, { "epoch": 0.6640285064551363, "grad_norm": 1.5865682363510132, "learning_rate": 5.35833892412435e-05, "loss": 1.6314, "step": 18542 }, { "epoch": 0.6640643185847046, "grad_norm": 1.7829359769821167, "learning_rate": 5.3573115748997284e-05, "loss": 1.3659, "step": 18543 }, { "epoch": 0.6641001307142729, "grad_norm": 1.5039947032928467, "learning_rate": 5.356284288136496e-05, "loss": 1.2948, "step": 18544 }, { "epoch": 0.6641359428438413, "grad_norm": 1.5445626974105835, "learning_rate": 5.3552570638484644e-05, "loss": 1.2314, "step": 18545 }, { "epoch": 0.6641717549734095, "grad_norm": 1.75049889087677, "learning_rate": 5.3542299020494567e-05, "loss": 1.4201, "step": 18546 }, { "epoch": 0.6642075671029778, "grad_norm": 1.5456581115722656, "learning_rate": 5.3532028027532947e-05, "loss": 1.6774, "step": 18547 }, { "epoch": 0.6642433792325461, "grad_norm": 1.814924955368042, "learning_rate": 5.352175765973797e-05, "loss": 1.4873, "step": 18548 }, { "epoch": 0.6642791913621143, "grad_norm": 1.8714579343795776, "learning_rate": 5.351148791724776e-05, "loss": 1.3834, "step": 18549 }, { "epoch": 0.6643150034916826, "grad_norm": 1.5379172563552856, "learning_rate": 5.3501218800200514e-05, "loss": 1.426, "step": 18550 }, { "epoch": 0.6643508156212509, "grad_norm": 1.540379524230957, "learning_rate": 5.349095030873443e-05, "loss": 1.4127, "step": 18551 }, { "epoch": 0.6643866277508192, "grad_norm": 1.8711189031600952, "learning_rate": 5.348068244298758e-05, "loss": 1.3488, "step": 18552 }, { "epoch": 0.6644224398803875, "grad_norm": 1.440477728843689, "learning_rate": 5.347041520309815e-05, "loss": 1.506, "step": 18553 }, { "epoch": 0.6644582520099558, "grad_norm": 1.2828096151351929, "learning_rate": 5.346014858920425e-05, "loss": 1.4859, "step": 18554 }, { "epoch": 0.664494064139524, "grad_norm": 1.695265769958496, "learning_rate": 5.3449882601444054e-05, "loss": 1.2407, "step": 18555 }, { "epoch": 0.6645298762690923, "grad_norm": 2.340801239013672, "learning_rate": 5.343961723995561e-05, "loss": 1.5744, "step": 18556 }, { "epoch": 0.6645656883986606, "grad_norm": 1.5414936542510986, "learning_rate": 5.342935250487706e-05, "loss": 1.3723, "step": 18557 }, { "epoch": 0.6646015005282289, "grad_norm": 1.4003174304962158, "learning_rate": 5.341908839634654e-05, "loss": 1.6129, "step": 18558 }, { "epoch": 0.6646373126577972, "grad_norm": 1.514754295349121, "learning_rate": 5.340882491450205e-05, "loss": 1.3989, "step": 18559 }, { "epoch": 0.6646731247873655, "grad_norm": 1.9830070734024048, "learning_rate": 5.339856205948175e-05, "loss": 1.7887, "step": 18560 }, { "epoch": 0.6647089369169338, "grad_norm": 1.8465994596481323, "learning_rate": 5.338829983142366e-05, "loss": 1.9694, "step": 18561 }, { "epoch": 0.664744749046502, "grad_norm": 1.5064421892166138, "learning_rate": 5.337803823046592e-05, "loss": 1.391, "step": 18562 }, { "epoch": 0.6647805611760703, "grad_norm": 1.7226872444152832, "learning_rate": 5.33677772567465e-05, "loss": 1.4824, "step": 18563 }, { "epoch": 0.6648163733056386, "grad_norm": 2.0793983936309814, "learning_rate": 5.335751691040348e-05, "loss": 1.7713, "step": 18564 }, { "epoch": 0.6648521854352069, "grad_norm": 1.8134207725524902, "learning_rate": 5.334725719157492e-05, "loss": 1.3679, "step": 18565 }, { "epoch": 0.6648879975647752, "grad_norm": 2.0199649333953857, "learning_rate": 5.333699810039885e-05, "loss": 1.5586, "step": 18566 }, { "epoch": 0.6649238096943435, "grad_norm": 1.5236722230911255, "learning_rate": 5.3326739637013255e-05, "loss": 1.5598, "step": 18567 }, { "epoch": 0.6649596218239118, "grad_norm": 1.9306199550628662, "learning_rate": 5.3316481801556173e-05, "loss": 1.6249, "step": 18568 }, { "epoch": 0.66499543395348, "grad_norm": 1.7623952627182007, "learning_rate": 5.3306224594165654e-05, "loss": 1.5419, "step": 18569 }, { "epoch": 0.6650312460830483, "grad_norm": 1.5631310939788818, "learning_rate": 5.3295968014979613e-05, "loss": 1.6085, "step": 18570 }, { "epoch": 0.6650670582126166, "grad_norm": 1.6538745164871216, "learning_rate": 5.328571206413607e-05, "loss": 1.1281, "step": 18571 }, { "epoch": 0.6651028703421848, "grad_norm": 1.778361439704895, "learning_rate": 5.3275456741773025e-05, "loss": 1.6629, "step": 18572 }, { "epoch": 0.6651386824717532, "grad_norm": 1.872376561164856, "learning_rate": 5.3265202048028474e-05, "loss": 1.7646, "step": 18573 }, { "epoch": 0.6651744946013215, "grad_norm": 1.703460693359375, "learning_rate": 5.32549479830403e-05, "loss": 1.2561, "step": 18574 }, { "epoch": 0.6652103067308898, "grad_norm": 1.7537646293640137, "learning_rate": 5.324469454694651e-05, "loss": 1.3299, "step": 18575 }, { "epoch": 0.665246118860458, "grad_norm": 1.5586273670196533, "learning_rate": 5.323444173988509e-05, "loss": 1.397, "step": 18576 }, { "epoch": 0.6652819309900263, "grad_norm": 1.939870834350586, "learning_rate": 5.3224189561993886e-05, "loss": 1.5419, "step": 18577 }, { "epoch": 0.6653177431195946, "grad_norm": 1.8316444158554077, "learning_rate": 5.321393801341088e-05, "loss": 1.1162, "step": 18578 }, { "epoch": 0.6653535552491628, "grad_norm": 1.9167686700820923, "learning_rate": 5.320368709427399e-05, "loss": 1.6643, "step": 18579 }, { "epoch": 0.6653893673787312, "grad_norm": 1.688992977142334, "learning_rate": 5.3193436804721154e-05, "loss": 1.4744, "step": 18580 }, { "epoch": 0.6654251795082995, "grad_norm": 1.4735733270645142, "learning_rate": 5.318318714489021e-05, "loss": 1.5833, "step": 18581 }, { "epoch": 0.6654609916378678, "grad_norm": 1.6011989116668701, "learning_rate": 5.317293811491911e-05, "loss": 1.3692, "step": 18582 }, { "epoch": 0.665496803767436, "grad_norm": 1.7354061603546143, "learning_rate": 5.316268971494571e-05, "loss": 1.3453, "step": 18583 }, { "epoch": 0.6655326158970043, "grad_norm": 1.5282679796218872, "learning_rate": 5.315244194510795e-05, "loss": 1.5145, "step": 18584 }, { "epoch": 0.6655684280265726, "grad_norm": 1.8057043552398682, "learning_rate": 5.3142194805543625e-05, "loss": 1.2936, "step": 18585 }, { "epoch": 0.6656042401561408, "grad_norm": 1.4133145809173584, "learning_rate": 5.313194829639061e-05, "loss": 1.4274, "step": 18586 }, { "epoch": 0.6656400522857092, "grad_norm": 1.4651211500167847, "learning_rate": 5.312170241778682e-05, "loss": 1.5827, "step": 18587 }, { "epoch": 0.6656758644152775, "grad_norm": 1.4076982736587524, "learning_rate": 5.311145716987003e-05, "loss": 1.4423, "step": 18588 }, { "epoch": 0.6657116765448458, "grad_norm": 1.5630006790161133, "learning_rate": 5.310121255277809e-05, "loss": 1.2223, "step": 18589 }, { "epoch": 0.665747488674414, "grad_norm": 1.5476254224777222, "learning_rate": 5.3090968566648836e-05, "loss": 1.3305, "step": 18590 }, { "epoch": 0.6657833008039823, "grad_norm": 1.4601191282272339, "learning_rate": 5.308072521162013e-05, "loss": 1.3458, "step": 18591 }, { "epoch": 0.6658191129335506, "grad_norm": 1.768479347229004, "learning_rate": 5.307048248782975e-05, "loss": 1.4372, "step": 18592 }, { "epoch": 0.6658549250631188, "grad_norm": 1.4947692155838013, "learning_rate": 5.306024039541542e-05, "loss": 1.4677, "step": 18593 }, { "epoch": 0.6658907371926872, "grad_norm": 1.4802711009979248, "learning_rate": 5.3049998934515076e-05, "loss": 1.3701, "step": 18594 }, { "epoch": 0.6659265493222555, "grad_norm": 1.391737461090088, "learning_rate": 5.30397581052664e-05, "loss": 1.2477, "step": 18595 }, { "epoch": 0.6659623614518237, "grad_norm": 2.0155186653137207, "learning_rate": 5.302951790780725e-05, "loss": 1.8189, "step": 18596 }, { "epoch": 0.665998173581392, "grad_norm": 1.9427070617675781, "learning_rate": 5.3019278342275256e-05, "loss": 1.421, "step": 18597 }, { "epoch": 0.6660339857109603, "grad_norm": 1.6364455223083496, "learning_rate": 5.300903940880837e-05, "loss": 1.5302, "step": 18598 }, { "epoch": 0.6660697978405286, "grad_norm": 1.3640127182006836, "learning_rate": 5.299880110754418e-05, "loss": 1.4539, "step": 18599 }, { "epoch": 0.6661056099700968, "grad_norm": 1.8149890899658203, "learning_rate": 5.298856343862051e-05, "loss": 1.3804, "step": 18600 }, { "epoch": 0.6661414220996652, "grad_norm": 2.145496368408203, "learning_rate": 5.2978326402175125e-05, "loss": 1.3423, "step": 18601 }, { "epoch": 0.6661772342292335, "grad_norm": 1.472667932510376, "learning_rate": 5.296808999834565e-05, "loss": 1.0807, "step": 18602 }, { "epoch": 0.6662130463588017, "grad_norm": 2.075178861618042, "learning_rate": 5.295785422726991e-05, "loss": 1.5908, "step": 18603 }, { "epoch": 0.66624885848837, "grad_norm": 1.5914913415908813, "learning_rate": 5.2947619089085463e-05, "loss": 1.2944, "step": 18604 }, { "epoch": 0.6662846706179383, "grad_norm": 1.5347884893417358, "learning_rate": 5.2937384583930204e-05, "loss": 1.5397, "step": 18605 }, { "epoch": 0.6663204827475065, "grad_norm": 1.6247977018356323, "learning_rate": 5.2927150711941675e-05, "loss": 1.1967, "step": 18606 }, { "epoch": 0.6663562948770748, "grad_norm": 1.4314088821411133, "learning_rate": 5.2916917473257665e-05, "loss": 1.5123, "step": 18607 }, { "epoch": 0.6663921070066432, "grad_norm": 1.2514408826828003, "learning_rate": 5.2906684868015724e-05, "loss": 1.664, "step": 18608 }, { "epoch": 0.6664279191362115, "grad_norm": 1.1693495512008667, "learning_rate": 5.2896452896353656e-05, "loss": 1.2389, "step": 18609 }, { "epoch": 0.6664637312657797, "grad_norm": 2.151035785675049, "learning_rate": 5.2886221558409065e-05, "loss": 1.4911, "step": 18610 }, { "epoch": 0.666499543395348, "grad_norm": 1.815909743309021, "learning_rate": 5.287599085431951e-05, "loss": 1.7131, "step": 18611 }, { "epoch": 0.6665353555249163, "grad_norm": 1.4415584802627563, "learning_rate": 5.2865760784222786e-05, "loss": 1.3634, "step": 18612 }, { "epoch": 0.6665711676544845, "grad_norm": 2.0365536212921143, "learning_rate": 5.2855531348256424e-05, "loss": 1.6613, "step": 18613 }, { "epoch": 0.6666069797840528, "grad_norm": 1.3029762506484985, "learning_rate": 5.2845302546558105e-05, "loss": 1.4112, "step": 18614 }, { "epoch": 0.6666427919136212, "grad_norm": 2.367351770401001, "learning_rate": 5.283507437926534e-05, "loss": 1.5832, "step": 18615 }, { "epoch": 0.6666786040431895, "grad_norm": 1.4028692245483398, "learning_rate": 5.2824846846515886e-05, "loss": 1.381, "step": 18616 }, { "epoch": 0.6667144161727577, "grad_norm": 1.3128471374511719, "learning_rate": 5.281461994844723e-05, "loss": 1.2977, "step": 18617 }, { "epoch": 0.666750228302326, "grad_norm": 1.9322855472564697, "learning_rate": 5.280439368519703e-05, "loss": 1.5668, "step": 18618 }, { "epoch": 0.6667860404318943, "grad_norm": 2.173131227493286, "learning_rate": 5.27941680569028e-05, "loss": 1.5765, "step": 18619 }, { "epoch": 0.6668218525614625, "grad_norm": 1.7341821193695068, "learning_rate": 5.2783943063702155e-05, "loss": 1.6393, "step": 18620 }, { "epoch": 0.6668576646910308, "grad_norm": 2.0364861488342285, "learning_rate": 5.277371870573269e-05, "loss": 1.6499, "step": 18621 }, { "epoch": 0.6668934768205992, "grad_norm": 1.3587638139724731, "learning_rate": 5.276349498313188e-05, "loss": 1.3826, "step": 18622 }, { "epoch": 0.6669292889501675, "grad_norm": 1.7243382930755615, "learning_rate": 5.2753271896037316e-05, "loss": 1.3895, "step": 18623 }, { "epoch": 0.6669651010797357, "grad_norm": 1.643670916557312, "learning_rate": 5.274304944458652e-05, "loss": 1.5037, "step": 18624 }, { "epoch": 0.667000913209304, "grad_norm": 2.013648509979248, "learning_rate": 5.273282762891709e-05, "loss": 1.341, "step": 18625 }, { "epoch": 0.6670367253388723, "grad_norm": 1.607382893562317, "learning_rate": 5.2722606449166426e-05, "loss": 1.2063, "step": 18626 }, { "epoch": 0.6670725374684405, "grad_norm": 1.6168111562728882, "learning_rate": 5.271238590547216e-05, "loss": 1.5996, "step": 18627 }, { "epoch": 0.6671083495980088, "grad_norm": 1.732462763786316, "learning_rate": 5.270216599797176e-05, "loss": 1.466, "step": 18628 }, { "epoch": 0.6671441617275772, "grad_norm": 1.7212103605270386, "learning_rate": 5.269194672680267e-05, "loss": 1.5072, "step": 18629 }, { "epoch": 0.6671799738571454, "grad_norm": 1.5116984844207764, "learning_rate": 5.268172809210241e-05, "loss": 1.268, "step": 18630 }, { "epoch": 0.6672157859867137, "grad_norm": 1.6461093425750732, "learning_rate": 5.267151009400846e-05, "loss": 1.543, "step": 18631 }, { "epoch": 0.667251598116282, "grad_norm": 1.5249959230422974, "learning_rate": 5.266129273265834e-05, "loss": 1.5185, "step": 18632 }, { "epoch": 0.6672874102458503, "grad_norm": 1.881767749786377, "learning_rate": 5.2651076008189415e-05, "loss": 1.3663, "step": 18633 }, { "epoch": 0.6673232223754185, "grad_norm": 1.7331863641738892, "learning_rate": 5.2640859920739194e-05, "loss": 1.5388, "step": 18634 }, { "epoch": 0.6673590345049868, "grad_norm": 1.5982319116592407, "learning_rate": 5.263064447044511e-05, "loss": 1.6447, "step": 18635 }, { "epoch": 0.6673948466345552, "grad_norm": 1.6819671392440796, "learning_rate": 5.262042965744465e-05, "loss": 1.4766, "step": 18636 }, { "epoch": 0.6674306587641234, "grad_norm": 2.2758350372314453, "learning_rate": 5.261021548187515e-05, "loss": 1.1752, "step": 18637 }, { "epoch": 0.6674664708936917, "grad_norm": 1.5425456762313843, "learning_rate": 5.260000194387407e-05, "loss": 1.1864, "step": 18638 }, { "epoch": 0.66750228302326, "grad_norm": 1.5215123891830444, "learning_rate": 5.2589789043578855e-05, "loss": 1.2848, "step": 18639 }, { "epoch": 0.6675380951528282, "grad_norm": 1.7015901803970337, "learning_rate": 5.257957678112684e-05, "loss": 1.3467, "step": 18640 }, { "epoch": 0.6675739072823965, "grad_norm": 1.6739884614944458, "learning_rate": 5.2569365156655446e-05, "loss": 1.7775, "step": 18641 }, { "epoch": 0.6676097194119648, "grad_norm": 1.2966762781143188, "learning_rate": 5.255915417030206e-05, "loss": 1.268, "step": 18642 }, { "epoch": 0.6676455315415332, "grad_norm": 1.2526237964630127, "learning_rate": 5.254894382220412e-05, "loss": 1.3321, "step": 18643 }, { "epoch": 0.6676813436711014, "grad_norm": 1.7547239065170288, "learning_rate": 5.2538734112498876e-05, "loss": 1.5343, "step": 18644 }, { "epoch": 0.6677171558006697, "grad_norm": 1.8325891494750977, "learning_rate": 5.252852504132375e-05, "loss": 1.3332, "step": 18645 }, { "epoch": 0.667752967930238, "grad_norm": 2.117971658706665, "learning_rate": 5.251831660881612e-05, "loss": 1.454, "step": 18646 }, { "epoch": 0.6677887800598062, "grad_norm": 1.859230875968933, "learning_rate": 5.2508108815113264e-05, "loss": 1.4038, "step": 18647 }, { "epoch": 0.6678245921893745, "grad_norm": 2.172140121459961, "learning_rate": 5.249790166035253e-05, "loss": 1.1993, "step": 18648 }, { "epoch": 0.6678604043189428, "grad_norm": 1.781510353088379, "learning_rate": 5.2487695144671264e-05, "loss": 1.5604, "step": 18649 }, { "epoch": 0.6678962164485112, "grad_norm": 1.547957181930542, "learning_rate": 5.247748926820683e-05, "loss": 1.305, "step": 18650 }, { "epoch": 0.6679320285780794, "grad_norm": 1.5986849069595337, "learning_rate": 5.246728403109642e-05, "loss": 1.8007, "step": 18651 }, { "epoch": 0.6679678407076477, "grad_norm": 1.7270187139511108, "learning_rate": 5.245707943347738e-05, "loss": 1.1008, "step": 18652 }, { "epoch": 0.668003652837216, "grad_norm": 1.8409010171890259, "learning_rate": 5.244687547548703e-05, "loss": 1.206, "step": 18653 }, { "epoch": 0.6680394649667842, "grad_norm": 1.8632875680923462, "learning_rate": 5.243667215726267e-05, "loss": 1.5249, "step": 18654 }, { "epoch": 0.6680752770963525, "grad_norm": 1.8973302841186523, "learning_rate": 5.242646947894148e-05, "loss": 1.3558, "step": 18655 }, { "epoch": 0.6681110892259208, "grad_norm": 1.471661925315857, "learning_rate": 5.241626744066079e-05, "loss": 1.5711, "step": 18656 }, { "epoch": 0.6681469013554892, "grad_norm": 1.3317395448684692, "learning_rate": 5.240606604255787e-05, "loss": 1.1836, "step": 18657 }, { "epoch": 0.6681827134850574, "grad_norm": 1.742253065109253, "learning_rate": 5.239586528476992e-05, "loss": 1.5158, "step": 18658 }, { "epoch": 0.6682185256146257, "grad_norm": 1.6909421682357788, "learning_rate": 5.2385665167434175e-05, "loss": 1.2893, "step": 18659 }, { "epoch": 0.668254337744194, "grad_norm": 1.7493952512741089, "learning_rate": 5.2375465690687895e-05, "loss": 1.4646, "step": 18660 }, { "epoch": 0.6682901498737622, "grad_norm": 1.4725412130355835, "learning_rate": 5.236526685466834e-05, "loss": 1.5871, "step": 18661 }, { "epoch": 0.6683259620033305, "grad_norm": 1.6244035959243774, "learning_rate": 5.235506865951263e-05, "loss": 1.4979, "step": 18662 }, { "epoch": 0.6683617741328988, "grad_norm": 1.7314326763153076, "learning_rate": 5.234487110535802e-05, "loss": 1.364, "step": 18663 }, { "epoch": 0.6683975862624671, "grad_norm": 1.394898772239685, "learning_rate": 5.233467419234173e-05, "loss": 1.3766, "step": 18664 }, { "epoch": 0.6684333983920354, "grad_norm": 1.64047110080719, "learning_rate": 5.2324477920600876e-05, "loss": 1.3212, "step": 18665 }, { "epoch": 0.6684692105216037, "grad_norm": 1.883249044418335, "learning_rate": 5.231428229027269e-05, "loss": 1.3998, "step": 18666 }, { "epoch": 0.668505022651172, "grad_norm": 1.5769582986831665, "learning_rate": 5.23040873014943e-05, "loss": 1.3623, "step": 18667 }, { "epoch": 0.6685408347807402, "grad_norm": 1.6904163360595703, "learning_rate": 5.229389295440295e-05, "loss": 1.5583, "step": 18668 }, { "epoch": 0.6685766469103085, "grad_norm": 1.9609761238098145, "learning_rate": 5.228369924913567e-05, "loss": 1.2652, "step": 18669 }, { "epoch": 0.6686124590398768, "grad_norm": 1.8444284200668335, "learning_rate": 5.22735061858297e-05, "loss": 1.3636, "step": 18670 }, { "epoch": 0.6686482711694451, "grad_norm": 1.3868964910507202, "learning_rate": 5.2263313764622124e-05, "loss": 1.17, "step": 18671 }, { "epoch": 0.6686840832990134, "grad_norm": 1.3549922704696655, "learning_rate": 5.225312198565013e-05, "loss": 1.4785, "step": 18672 }, { "epoch": 0.6687198954285817, "grad_norm": 1.5612128973007202, "learning_rate": 5.224293084905074e-05, "loss": 1.3645, "step": 18673 }, { "epoch": 0.66875570755815, "grad_norm": 1.5936546325683594, "learning_rate": 5.223274035496113e-05, "loss": 1.5845, "step": 18674 }, { "epoch": 0.6687915196877182, "grad_norm": 1.3002891540527344, "learning_rate": 5.222255050351841e-05, "loss": 1.4866, "step": 18675 }, { "epoch": 0.6688273318172865, "grad_norm": 1.6563246250152588, "learning_rate": 5.221236129485961e-05, "loss": 1.1432, "step": 18676 }, { "epoch": 0.6688631439468548, "grad_norm": 1.0846136808395386, "learning_rate": 5.2202172729121844e-05, "loss": 1.3841, "step": 18677 }, { "epoch": 0.6688989560764231, "grad_norm": 1.6632499694824219, "learning_rate": 5.219198480644221e-05, "loss": 1.7012, "step": 18678 }, { "epoch": 0.6689347682059914, "grad_norm": 1.6548495292663574, "learning_rate": 5.2181797526957764e-05, "loss": 1.4284, "step": 18679 }, { "epoch": 0.6689705803355597, "grad_norm": 1.82999849319458, "learning_rate": 5.2171610890805524e-05, "loss": 1.2064, "step": 18680 }, { "epoch": 0.6690063924651279, "grad_norm": 1.543445348739624, "learning_rate": 5.216142489812256e-05, "loss": 1.4014, "step": 18681 }, { "epoch": 0.6690422045946962, "grad_norm": 2.067296028137207, "learning_rate": 5.215123954904596e-05, "loss": 1.4993, "step": 18682 }, { "epoch": 0.6690780167242645, "grad_norm": 1.2235534191131592, "learning_rate": 5.2141054843712675e-05, "loss": 1.3685, "step": 18683 }, { "epoch": 0.6691138288538327, "grad_norm": 1.3292855024337769, "learning_rate": 5.213087078225975e-05, "loss": 1.5753, "step": 18684 }, { "epoch": 0.6691496409834011, "grad_norm": 1.5418928861618042, "learning_rate": 5.212068736482423e-05, "loss": 1.7125, "step": 18685 }, { "epoch": 0.6691854531129694, "grad_norm": 1.34342360496521, "learning_rate": 5.211050459154313e-05, "loss": 1.6613, "step": 18686 }, { "epoch": 0.6692212652425377, "grad_norm": 1.657388687133789, "learning_rate": 5.210032246255338e-05, "loss": 1.372, "step": 18687 }, { "epoch": 0.6692570773721059, "grad_norm": 1.9113507270812988, "learning_rate": 5.209014097799201e-05, "loss": 1.6323, "step": 18688 }, { "epoch": 0.6692928895016742, "grad_norm": 1.7546972036361694, "learning_rate": 5.207996013799603e-05, "loss": 1.4294, "step": 18689 }, { "epoch": 0.6693287016312425, "grad_norm": 2.2239842414855957, "learning_rate": 5.206977994270233e-05, "loss": 1.5034, "step": 18690 }, { "epoch": 0.6693645137608107, "grad_norm": 2.2625253200531006, "learning_rate": 5.205960039224795e-05, "loss": 1.7117, "step": 18691 }, { "epoch": 0.6694003258903791, "grad_norm": 2.014033079147339, "learning_rate": 5.2049421486769744e-05, "loss": 1.5873, "step": 18692 }, { "epoch": 0.6694361380199474, "grad_norm": 1.4991728067398071, "learning_rate": 5.203924322640479e-05, "loss": 1.2493, "step": 18693 }, { "epoch": 0.6694719501495157, "grad_norm": 1.4453134536743164, "learning_rate": 5.2029065611289926e-05, "loss": 1.5998, "step": 18694 }, { "epoch": 0.6695077622790839, "grad_norm": 1.5553264617919922, "learning_rate": 5.2018888641562126e-05, "loss": 1.2448, "step": 18695 }, { "epoch": 0.6695435744086522, "grad_norm": 1.590145230293274, "learning_rate": 5.200871231735822e-05, "loss": 1.3542, "step": 18696 }, { "epoch": 0.6695793865382205, "grad_norm": 1.796265959739685, "learning_rate": 5.1998536638815266e-05, "loss": 1.4915, "step": 18697 }, { "epoch": 0.6696151986677887, "grad_norm": 1.4939812421798706, "learning_rate": 5.198836160607008e-05, "loss": 1.4749, "step": 18698 }, { "epoch": 0.6696510107973571, "grad_norm": 1.3615853786468506, "learning_rate": 5.197818721925949e-05, "loss": 1.4405, "step": 18699 }, { "epoch": 0.6696868229269254, "grad_norm": 1.585461974143982, "learning_rate": 5.196801347852051e-05, "loss": 1.3092, "step": 18700 }, { "epoch": 0.6697226350564937, "grad_norm": 1.6997668743133545, "learning_rate": 5.195784038398992e-05, "loss": 1.5083, "step": 18701 }, { "epoch": 0.6697584471860619, "grad_norm": 1.4121911525726318, "learning_rate": 5.194766793580466e-05, "loss": 1.1211, "step": 18702 }, { "epoch": 0.6697942593156302, "grad_norm": 1.5166962146759033, "learning_rate": 5.193749613410146e-05, "loss": 1.2174, "step": 18703 }, { "epoch": 0.6698300714451985, "grad_norm": 1.7505559921264648, "learning_rate": 5.1927324979017335e-05, "loss": 1.4222, "step": 18704 }, { "epoch": 0.6698658835747667, "grad_norm": 1.718768835067749, "learning_rate": 5.191715447068901e-05, "loss": 1.2597, "step": 18705 }, { "epoch": 0.6699016957043351, "grad_norm": 1.9997526407241821, "learning_rate": 5.190698460925338e-05, "loss": 1.5697, "step": 18706 }, { "epoch": 0.6699375078339034, "grad_norm": 1.2355728149414062, "learning_rate": 5.1896815394847195e-05, "loss": 1.5054, "step": 18707 }, { "epoch": 0.6699733199634716, "grad_norm": 1.4850220680236816, "learning_rate": 5.188664682760731e-05, "loss": 1.6727, "step": 18708 }, { "epoch": 0.6700091320930399, "grad_norm": 1.334753155708313, "learning_rate": 5.1876478907670576e-05, "loss": 1.4472, "step": 18709 }, { "epoch": 0.6700449442226082, "grad_norm": 1.5342482328414917, "learning_rate": 5.186631163517367e-05, "loss": 1.2759, "step": 18710 }, { "epoch": 0.6700807563521765, "grad_norm": 1.6298881769180298, "learning_rate": 5.185614501025353e-05, "loss": 1.4384, "step": 18711 }, { "epoch": 0.6701165684817447, "grad_norm": 1.4282634258270264, "learning_rate": 5.184597903304681e-05, "loss": 1.4343, "step": 18712 }, { "epoch": 0.6701523806113131, "grad_norm": 1.7089579105377197, "learning_rate": 5.183581370369037e-05, "loss": 1.67, "step": 18713 }, { "epoch": 0.6701881927408814, "grad_norm": 1.507480502128601, "learning_rate": 5.182564902232086e-05, "loss": 1.4983, "step": 18714 }, { "epoch": 0.6702240048704496, "grad_norm": 1.9724632501602173, "learning_rate": 5.1815484989075157e-05, "loss": 1.4372, "step": 18715 }, { "epoch": 0.6702598170000179, "grad_norm": 1.533915400505066, "learning_rate": 5.1805321604089974e-05, "loss": 1.4643, "step": 18716 }, { "epoch": 0.6702956291295862, "grad_norm": 1.8036576509475708, "learning_rate": 5.1795158867501966e-05, "loss": 1.322, "step": 18717 }, { "epoch": 0.6703314412591544, "grad_norm": 1.8307304382324219, "learning_rate": 5.1784996779447926e-05, "loss": 1.4068, "step": 18718 }, { "epoch": 0.6703672533887227, "grad_norm": 1.794142484664917, "learning_rate": 5.177483534006455e-05, "loss": 1.6021, "step": 18719 }, { "epoch": 0.6704030655182911, "grad_norm": 1.9921245574951172, "learning_rate": 5.1764674549488614e-05, "loss": 1.0792, "step": 18720 }, { "epoch": 0.6704388776478594, "grad_norm": 1.671963095664978, "learning_rate": 5.175451440785671e-05, "loss": 1.7145, "step": 18721 }, { "epoch": 0.6704746897774276, "grad_norm": 2.1620876789093018, "learning_rate": 5.174435491530559e-05, "loss": 1.6685, "step": 18722 }, { "epoch": 0.6705105019069959, "grad_norm": 1.408326268196106, "learning_rate": 5.173419607197193e-05, "loss": 1.1387, "step": 18723 }, { "epoch": 0.6705463140365642, "grad_norm": 1.5527623891830444, "learning_rate": 5.172403787799245e-05, "loss": 1.1697, "step": 18724 }, { "epoch": 0.6705821261661324, "grad_norm": 1.6615278720855713, "learning_rate": 5.1713880333503704e-05, "loss": 1.6103, "step": 18725 }, { "epoch": 0.6706179382957007, "grad_norm": 2.0983941555023193, "learning_rate": 5.1703723438642436e-05, "loss": 1.6692, "step": 18726 }, { "epoch": 0.6706537504252691, "grad_norm": 1.7213473320007324, "learning_rate": 5.16935671935453e-05, "loss": 1.4165, "step": 18727 }, { "epoch": 0.6706895625548374, "grad_norm": 2.2718751430511475, "learning_rate": 5.1683411598348876e-05, "loss": 1.4563, "step": 18728 }, { "epoch": 0.6707253746844056, "grad_norm": 1.7657798528671265, "learning_rate": 5.167325665318983e-05, "loss": 1.6676, "step": 18729 }, { "epoch": 0.6707611868139739, "grad_norm": 1.8499521017074585, "learning_rate": 5.1663102358204754e-05, "loss": 1.5885, "step": 18730 }, { "epoch": 0.6707969989435422, "grad_norm": 1.6801620721817017, "learning_rate": 5.165294871353035e-05, "loss": 1.2257, "step": 18731 }, { "epoch": 0.6708328110731104, "grad_norm": 1.3649678230285645, "learning_rate": 5.16427957193031e-05, "loss": 1.4112, "step": 18732 }, { "epoch": 0.6708686232026787, "grad_norm": 1.5700057744979858, "learning_rate": 5.163264337565967e-05, "loss": 1.5946, "step": 18733 }, { "epoch": 0.6709044353322471, "grad_norm": 1.8407195806503296, "learning_rate": 5.1622491682736675e-05, "loss": 1.2455, "step": 18734 }, { "epoch": 0.6709402474618154, "grad_norm": 2.2659173011779785, "learning_rate": 5.16123406406706e-05, "loss": 1.8748, "step": 18735 }, { "epoch": 0.6709760595913836, "grad_norm": 2.0498504638671875, "learning_rate": 5.160219024959807e-05, "loss": 1.9133, "step": 18736 }, { "epoch": 0.6710118717209519, "grad_norm": 1.556580662727356, "learning_rate": 5.159204050965565e-05, "loss": 1.183, "step": 18737 }, { "epoch": 0.6710476838505202, "grad_norm": 1.2161188125610352, "learning_rate": 5.158189142097991e-05, "loss": 1.2662, "step": 18738 }, { "epoch": 0.6710834959800884, "grad_norm": 1.29056715965271, "learning_rate": 5.157174298370734e-05, "loss": 1.2986, "step": 18739 }, { "epoch": 0.6711193081096567, "grad_norm": 2.0055019855499268, "learning_rate": 5.15615951979745e-05, "loss": 1.2297, "step": 18740 }, { "epoch": 0.6711551202392251, "grad_norm": 1.7979457378387451, "learning_rate": 5.155144806391789e-05, "loss": 1.413, "step": 18741 }, { "epoch": 0.6711909323687933, "grad_norm": 1.5609806776046753, "learning_rate": 5.154130158167412e-05, "loss": 1.4996, "step": 18742 }, { "epoch": 0.6712267444983616, "grad_norm": 1.425581455230713, "learning_rate": 5.153115575137959e-05, "loss": 1.2355, "step": 18743 }, { "epoch": 0.6712625566279299, "grad_norm": 2.3990538120269775, "learning_rate": 5.152101057317082e-05, "loss": 1.3091, "step": 18744 }, { "epoch": 0.6712983687574982, "grad_norm": 1.8333427906036377, "learning_rate": 5.151086604718438e-05, "loss": 1.1728, "step": 18745 }, { "epoch": 0.6713341808870664, "grad_norm": 1.5617541074752808, "learning_rate": 5.150072217355664e-05, "loss": 1.1859, "step": 18746 }, { "epoch": 0.6713699930166347, "grad_norm": 1.6639366149902344, "learning_rate": 5.149057895242412e-05, "loss": 1.5773, "step": 18747 }, { "epoch": 0.6714058051462031, "grad_norm": 1.8866615295410156, "learning_rate": 5.148043638392329e-05, "loss": 1.2715, "step": 18748 }, { "epoch": 0.6714416172757713, "grad_norm": 1.8568611145019531, "learning_rate": 5.147029446819065e-05, "loss": 1.4749, "step": 18749 }, { "epoch": 0.6714774294053396, "grad_norm": 2.128819704055786, "learning_rate": 5.146015320536255e-05, "loss": 1.2029, "step": 18750 }, { "epoch": 0.6715132415349079, "grad_norm": 1.3638184070587158, "learning_rate": 5.145001259557548e-05, "loss": 1.3943, "step": 18751 }, { "epoch": 0.6715490536644761, "grad_norm": 1.7398433685302734, "learning_rate": 5.14398726389659e-05, "loss": 1.082, "step": 18752 }, { "epoch": 0.6715848657940444, "grad_norm": 2.225938320159912, "learning_rate": 5.142973333567016e-05, "loss": 1.4355, "step": 18753 }, { "epoch": 0.6716206779236127, "grad_norm": 2.0473129749298096, "learning_rate": 5.141959468582471e-05, "loss": 1.4406, "step": 18754 }, { "epoch": 0.6716564900531811, "grad_norm": 1.4689384698867798, "learning_rate": 5.140945668956595e-05, "loss": 1.4992, "step": 18755 }, { "epoch": 0.6716923021827493, "grad_norm": 1.6381616592407227, "learning_rate": 5.1399319347030306e-05, "loss": 1.6428, "step": 18756 }, { "epoch": 0.6717281143123176, "grad_norm": 1.9008187055587769, "learning_rate": 5.1389182658354105e-05, "loss": 1.8243, "step": 18757 }, { "epoch": 0.6717639264418859, "grad_norm": 3.256089448928833, "learning_rate": 5.137904662367373e-05, "loss": 1.5297, "step": 18758 }, { "epoch": 0.6717997385714541, "grad_norm": 1.397004246711731, "learning_rate": 5.136891124312557e-05, "loss": 1.2896, "step": 18759 }, { "epoch": 0.6718355507010224, "grad_norm": 1.8906328678131104, "learning_rate": 5.135877651684603e-05, "loss": 1.5225, "step": 18760 }, { "epoch": 0.6718713628305907, "grad_norm": 1.9930126667022705, "learning_rate": 5.1348642444971364e-05, "loss": 1.4106, "step": 18761 }, { "epoch": 0.6719071749601591, "grad_norm": 1.5230907201766968, "learning_rate": 5.133850902763795e-05, "loss": 1.4992, "step": 18762 }, { "epoch": 0.6719429870897273, "grad_norm": 1.6010066270828247, "learning_rate": 5.132837626498217e-05, "loss": 1.4002, "step": 18763 }, { "epoch": 0.6719787992192956, "grad_norm": 1.5590764284133911, "learning_rate": 5.1318244157140285e-05, "loss": 1.2998, "step": 18764 }, { "epoch": 0.6720146113488639, "grad_norm": 1.6329294443130493, "learning_rate": 5.13081127042486e-05, "loss": 1.1118, "step": 18765 }, { "epoch": 0.6720504234784321, "grad_norm": 1.5088551044464111, "learning_rate": 5.129798190644348e-05, "loss": 1.2605, "step": 18766 }, { "epoch": 0.6720862356080004, "grad_norm": 1.7145344018936157, "learning_rate": 5.128785176386122e-05, "loss": 1.3524, "step": 18767 }, { "epoch": 0.6721220477375687, "grad_norm": 1.9711799621582031, "learning_rate": 5.127772227663803e-05, "loss": 1.4702, "step": 18768 }, { "epoch": 0.672157859867137, "grad_norm": 2.004667043685913, "learning_rate": 5.1267593444910254e-05, "loss": 1.5904, "step": 18769 }, { "epoch": 0.6721936719967053, "grad_norm": 2.5094735622406006, "learning_rate": 5.125746526881417e-05, "loss": 1.4116, "step": 18770 }, { "epoch": 0.6722294841262736, "grad_norm": 1.3970831632614136, "learning_rate": 5.1247337748486005e-05, "loss": 1.3415, "step": 18771 }, { "epoch": 0.6722652962558419, "grad_norm": 1.901026725769043, "learning_rate": 5.1237210884061994e-05, "loss": 1.382, "step": 18772 }, { "epoch": 0.6723011083854101, "grad_norm": 1.8950276374816895, "learning_rate": 5.1227084675678425e-05, "loss": 1.2152, "step": 18773 }, { "epoch": 0.6723369205149784, "grad_norm": 1.730116844177246, "learning_rate": 5.121695912347156e-05, "loss": 1.4224, "step": 18774 }, { "epoch": 0.6723727326445467, "grad_norm": 1.482043743133545, "learning_rate": 5.120683422757755e-05, "loss": 1.526, "step": 18775 }, { "epoch": 0.672408544774115, "grad_norm": 1.451438069343567, "learning_rate": 5.119670998813264e-05, "loss": 1.4829, "step": 18776 }, { "epoch": 0.6724443569036833, "grad_norm": 2.14319109916687, "learning_rate": 5.1186586405273055e-05, "loss": 1.3499, "step": 18777 }, { "epoch": 0.6724801690332516, "grad_norm": 2.263322353363037, "learning_rate": 5.117646347913501e-05, "loss": 1.144, "step": 18778 }, { "epoch": 0.6725159811628199, "grad_norm": 1.8152785301208496, "learning_rate": 5.116634120985467e-05, "loss": 1.3347, "step": 18779 }, { "epoch": 0.6725517932923881, "grad_norm": 1.4670072793960571, "learning_rate": 5.115621959756815e-05, "loss": 1.3096, "step": 18780 }, { "epoch": 0.6725876054219564, "grad_norm": 1.4545962810516357, "learning_rate": 5.1146098642411765e-05, "loss": 1.4713, "step": 18781 }, { "epoch": 0.6726234175515247, "grad_norm": 1.4335076808929443, "learning_rate": 5.113597834452157e-05, "loss": 1.323, "step": 18782 }, { "epoch": 0.672659229681093, "grad_norm": 1.4578428268432617, "learning_rate": 5.1125858704033745e-05, "loss": 1.6607, "step": 18783 }, { "epoch": 0.6726950418106613, "grad_norm": 1.8013838529586792, "learning_rate": 5.111573972108446e-05, "loss": 1.7419, "step": 18784 }, { "epoch": 0.6727308539402296, "grad_norm": 1.3669114112854004, "learning_rate": 5.1105621395809875e-05, "loss": 1.3873, "step": 18785 }, { "epoch": 0.6727666660697978, "grad_norm": 1.4641414880752563, "learning_rate": 5.1095503728346095e-05, "loss": 1.4673, "step": 18786 }, { "epoch": 0.6728024781993661, "grad_norm": 2.017226219177246, "learning_rate": 5.108538671882914e-05, "loss": 1.6612, "step": 18787 }, { "epoch": 0.6728382903289344, "grad_norm": 1.617396593093872, "learning_rate": 5.10752703673953e-05, "loss": 1.5628, "step": 18788 }, { "epoch": 0.6728741024585027, "grad_norm": 1.8963992595672607, "learning_rate": 5.106515467418054e-05, "loss": 1.2682, "step": 18789 }, { "epoch": 0.672909914588071, "grad_norm": 2.118973731994629, "learning_rate": 5.1055039639321046e-05, "loss": 1.3527, "step": 18790 }, { "epoch": 0.6729457267176393, "grad_norm": 1.6391757726669312, "learning_rate": 5.104492526295278e-05, "loss": 1.6404, "step": 18791 }, { "epoch": 0.6729815388472076, "grad_norm": 1.6890900135040283, "learning_rate": 5.103481154521197e-05, "loss": 1.1516, "step": 18792 }, { "epoch": 0.6730173509767758, "grad_norm": 2.0314505100250244, "learning_rate": 5.102469848623459e-05, "loss": 1.0655, "step": 18793 }, { "epoch": 0.6730531631063441, "grad_norm": 1.655391812324524, "learning_rate": 5.10145860861567e-05, "loss": 1.3118, "step": 18794 }, { "epoch": 0.6730889752359124, "grad_norm": 1.847886085510254, "learning_rate": 5.1004474345114404e-05, "loss": 1.6991, "step": 18795 }, { "epoch": 0.6731247873654806, "grad_norm": 1.691873550415039, "learning_rate": 5.099436326324367e-05, "loss": 1.4669, "step": 18796 }, { "epoch": 0.673160599495049, "grad_norm": 1.4754281044006348, "learning_rate": 5.098425284068062e-05, "loss": 1.2964, "step": 18797 }, { "epoch": 0.6731964116246173, "grad_norm": 1.3968569040298462, "learning_rate": 5.0974143077561135e-05, "loss": 1.4885, "step": 18798 }, { "epoch": 0.6732322237541856, "grad_norm": 1.5505691766738892, "learning_rate": 5.0964033974021386e-05, "loss": 1.4887, "step": 18799 }, { "epoch": 0.6732680358837538, "grad_norm": 1.2982192039489746, "learning_rate": 5.095392553019728e-05, "loss": 1.4633, "step": 18800 }, { "epoch": 0.6733038480133221, "grad_norm": 1.4484449625015259, "learning_rate": 5.094381774622488e-05, "loss": 1.5766, "step": 18801 }, { "epoch": 0.6733396601428904, "grad_norm": 2.2219207286834717, "learning_rate": 5.0933710622240036e-05, "loss": 1.6196, "step": 18802 }, { "epoch": 0.6733754722724586, "grad_norm": 1.9814118146896362, "learning_rate": 5.0923604158378924e-05, "loss": 1.5831, "step": 18803 }, { "epoch": 0.673411284402027, "grad_norm": 2.133800983428955, "learning_rate": 5.091349835477741e-05, "loss": 1.3814, "step": 18804 }, { "epoch": 0.6734470965315953, "grad_norm": 1.7097578048706055, "learning_rate": 5.0903393211571414e-05, "loss": 1.5687, "step": 18805 }, { "epoch": 0.6734829086611636, "grad_norm": 1.711245059967041, "learning_rate": 5.089328872889694e-05, "loss": 1.2619, "step": 18806 }, { "epoch": 0.6735187207907318, "grad_norm": 1.5886105298995972, "learning_rate": 5.0883184906889924e-05, "loss": 1.3635, "step": 18807 }, { "epoch": 0.6735545329203001, "grad_norm": 1.8277711868286133, "learning_rate": 5.087308174568632e-05, "loss": 1.7714, "step": 18808 }, { "epoch": 0.6735903450498684, "grad_norm": 1.5488297939300537, "learning_rate": 5.086297924542198e-05, "loss": 1.2493, "step": 18809 }, { "epoch": 0.6736261571794366, "grad_norm": 1.1911660432815552, "learning_rate": 5.085287740623292e-05, "loss": 1.4918, "step": 18810 }, { "epoch": 0.673661969309005, "grad_norm": 2.2458438873291016, "learning_rate": 5.0842776228255e-05, "loss": 1.3248, "step": 18811 }, { "epoch": 0.6736977814385733, "grad_norm": 1.5594806671142578, "learning_rate": 5.083267571162412e-05, "loss": 1.5582, "step": 18812 }, { "epoch": 0.6737335935681416, "grad_norm": 2.4863622188568115, "learning_rate": 5.082257585647614e-05, "loss": 1.5133, "step": 18813 }, { "epoch": 0.6737694056977098, "grad_norm": 2.48881196975708, "learning_rate": 5.0812476662946975e-05, "loss": 1.3237, "step": 18814 }, { "epoch": 0.6738052178272781, "grad_norm": 1.7107995748519897, "learning_rate": 5.0802378131172525e-05, "loss": 1.8406, "step": 18815 }, { "epoch": 0.6738410299568464, "grad_norm": 1.973778247833252, "learning_rate": 5.079228026128857e-05, "loss": 1.2656, "step": 18816 }, { "epoch": 0.6738768420864146, "grad_norm": 1.4093636274337769, "learning_rate": 5.078218305343102e-05, "loss": 1.5239, "step": 18817 }, { "epoch": 0.6739126542159829, "grad_norm": 2.64251446723938, "learning_rate": 5.07720865077357e-05, "loss": 1.4003, "step": 18818 }, { "epoch": 0.6739484663455513, "grad_norm": 1.3757315874099731, "learning_rate": 5.0761990624338504e-05, "loss": 1.4934, "step": 18819 }, { "epoch": 0.6739842784751195, "grad_norm": 1.2844980955123901, "learning_rate": 5.075189540337514e-05, "loss": 1.5985, "step": 18820 }, { "epoch": 0.6740200906046878, "grad_norm": 2.0259010791778564, "learning_rate": 5.074180084498157e-05, "loss": 1.5542, "step": 18821 }, { "epoch": 0.6740559027342561, "grad_norm": 1.7492506504058838, "learning_rate": 5.0731706949293525e-05, "loss": 1.4885, "step": 18822 }, { "epoch": 0.6740917148638244, "grad_norm": 1.6474558115005493, "learning_rate": 5.072161371644677e-05, "loss": 1.5, "step": 18823 }, { "epoch": 0.6741275269933926, "grad_norm": 1.8024877309799194, "learning_rate": 5.0711521146577156e-05, "loss": 1.3702, "step": 18824 }, { "epoch": 0.6741633391229609, "grad_norm": 1.5845979452133179, "learning_rate": 5.070142923982043e-05, "loss": 1.2484, "step": 18825 }, { "epoch": 0.6741991512525293, "grad_norm": 1.974819540977478, "learning_rate": 5.069133799631243e-05, "loss": 1.4982, "step": 18826 }, { "epoch": 0.6742349633820975, "grad_norm": 2.497584819793701, "learning_rate": 5.0681247416188826e-05, "loss": 1.5294, "step": 18827 }, { "epoch": 0.6742707755116658, "grad_norm": 1.8491204977035522, "learning_rate": 5.067115749958543e-05, "loss": 1.4184, "step": 18828 }, { "epoch": 0.6743065876412341, "grad_norm": 1.4940153360366821, "learning_rate": 5.066106824663798e-05, "loss": 1.4861, "step": 18829 }, { "epoch": 0.6743423997708023, "grad_norm": 1.228395938873291, "learning_rate": 5.065097965748224e-05, "loss": 1.543, "step": 18830 }, { "epoch": 0.6743782119003706, "grad_norm": 1.5438652038574219, "learning_rate": 5.0640891732253905e-05, "loss": 1.3178, "step": 18831 }, { "epoch": 0.6744140240299389, "grad_norm": 1.4345051050186157, "learning_rate": 5.063080447108868e-05, "loss": 1.5065, "step": 18832 }, { "epoch": 0.6744498361595073, "grad_norm": 1.7704910039901733, "learning_rate": 5.0620717874122336e-05, "loss": 1.4677, "step": 18833 }, { "epoch": 0.6744856482890755, "grad_norm": 1.8506509065628052, "learning_rate": 5.06106319414905e-05, "loss": 1.8333, "step": 18834 }, { "epoch": 0.6745214604186438, "grad_norm": 1.587844967842102, "learning_rate": 5.0600546673328916e-05, "loss": 1.3863, "step": 18835 }, { "epoch": 0.6745572725482121, "grad_norm": 1.68564772605896, "learning_rate": 5.059046206977325e-05, "loss": 1.9733, "step": 18836 }, { "epoch": 0.6745930846777803, "grad_norm": 1.5647304058074951, "learning_rate": 5.0580378130959216e-05, "loss": 1.8071, "step": 18837 }, { "epoch": 0.6746288968073486, "grad_norm": 1.7359753847122192, "learning_rate": 5.05702948570224e-05, "loss": 1.1989, "step": 18838 }, { "epoch": 0.6746647089369169, "grad_norm": 2.1853439807891846, "learning_rate": 5.056021224809853e-05, "loss": 1.3432, "step": 18839 }, { "epoch": 0.6747005210664853, "grad_norm": 1.6081196069717407, "learning_rate": 5.055013030432326e-05, "loss": 1.1698, "step": 18840 }, { "epoch": 0.6747363331960535, "grad_norm": 1.6497262716293335, "learning_rate": 5.054004902583216e-05, "loss": 1.4807, "step": 18841 }, { "epoch": 0.6747721453256218, "grad_norm": 2.125016689300537, "learning_rate": 5.052996841276091e-05, "loss": 1.4742, "step": 18842 }, { "epoch": 0.6748079574551901, "grad_norm": 1.5704431533813477, "learning_rate": 5.0519888465245116e-05, "loss": 1.5663, "step": 18843 }, { "epoch": 0.6748437695847583, "grad_norm": 2.178602695465088, "learning_rate": 5.050980918342043e-05, "loss": 1.57, "step": 18844 }, { "epoch": 0.6748795817143266, "grad_norm": 1.4927809238433838, "learning_rate": 5.04997305674224e-05, "loss": 1.3771, "step": 18845 }, { "epoch": 0.6749153938438949, "grad_norm": 3.4965755939483643, "learning_rate": 5.048965261738664e-05, "loss": 1.6496, "step": 18846 }, { "epoch": 0.6749512059734633, "grad_norm": 1.5973447561264038, "learning_rate": 5.047957533344874e-05, "loss": 1.3466, "step": 18847 }, { "epoch": 0.6749870181030315, "grad_norm": 1.5947939157485962, "learning_rate": 5.0469498715744314e-05, "loss": 1.3644, "step": 18848 }, { "epoch": 0.6750228302325998, "grad_norm": 1.60691237449646, "learning_rate": 5.045942276440885e-05, "loss": 1.6925, "step": 18849 }, { "epoch": 0.6750586423621681, "grad_norm": 1.4345650672912598, "learning_rate": 5.0449347479577946e-05, "loss": 1.163, "step": 18850 }, { "epoch": 0.6750944544917363, "grad_norm": 2.119398593902588, "learning_rate": 5.043927286138721e-05, "loss": 1.5569, "step": 18851 }, { "epoch": 0.6751302666213046, "grad_norm": 1.8614598512649536, "learning_rate": 5.0429198909972086e-05, "loss": 1.5067, "step": 18852 }, { "epoch": 0.6751660787508729, "grad_norm": 2.991887331008911, "learning_rate": 5.041912562546813e-05, "loss": 1.673, "step": 18853 }, { "epoch": 0.6752018908804412, "grad_norm": 1.687841534614563, "learning_rate": 5.040905300801091e-05, "loss": 1.235, "step": 18854 }, { "epoch": 0.6752377030100095, "grad_norm": 2.048767566680908, "learning_rate": 5.039898105773594e-05, "loss": 1.5541, "step": 18855 }, { "epoch": 0.6752735151395778, "grad_norm": 1.4294754266738892, "learning_rate": 5.038890977477866e-05, "loss": 1.2713, "step": 18856 }, { "epoch": 0.675309327269146, "grad_norm": 2.4173076152801514, "learning_rate": 5.037883915927462e-05, "loss": 1.4883, "step": 18857 }, { "epoch": 0.6753451393987143, "grad_norm": 1.8582412004470825, "learning_rate": 5.036876921135931e-05, "loss": 1.2395, "step": 18858 }, { "epoch": 0.6753809515282826, "grad_norm": 1.710937738418579, "learning_rate": 5.035869993116816e-05, "loss": 1.5521, "step": 18859 }, { "epoch": 0.6754167636578509, "grad_norm": 1.5905033349990845, "learning_rate": 5.034863131883667e-05, "loss": 1.5004, "step": 18860 }, { "epoch": 0.6754525757874192, "grad_norm": 1.7289782762527466, "learning_rate": 5.03385633745003e-05, "loss": 1.4988, "step": 18861 }, { "epoch": 0.6754883879169875, "grad_norm": 1.712811827659607, "learning_rate": 5.032849609829454e-05, "loss": 1.6214, "step": 18862 }, { "epoch": 0.6755242000465558, "grad_norm": 2.12488055229187, "learning_rate": 5.0318429490354754e-05, "loss": 1.4074, "step": 18863 }, { "epoch": 0.675560012176124, "grad_norm": 1.458487629890442, "learning_rate": 5.030836355081643e-05, "loss": 1.3575, "step": 18864 }, { "epoch": 0.6755958243056923, "grad_norm": 2.2130470275878906, "learning_rate": 5.0298298279814956e-05, "loss": 1.617, "step": 18865 }, { "epoch": 0.6756316364352606, "grad_norm": 1.495819091796875, "learning_rate": 5.0288233677485806e-05, "loss": 1.4228, "step": 18866 }, { "epoch": 0.6756674485648289, "grad_norm": 1.9383759498596191, "learning_rate": 5.027816974396432e-05, "loss": 1.2309, "step": 18867 }, { "epoch": 0.6757032606943972, "grad_norm": 1.6474690437316895, "learning_rate": 5.0268106479385924e-05, "loss": 1.1464, "step": 18868 }, { "epoch": 0.6757390728239655, "grad_norm": 1.8009663820266724, "learning_rate": 5.025804388388604e-05, "loss": 1.7125, "step": 18869 }, { "epoch": 0.6757748849535338, "grad_norm": 1.71844482421875, "learning_rate": 5.024798195759998e-05, "loss": 1.6644, "step": 18870 }, { "epoch": 0.675810697083102, "grad_norm": 1.4656023979187012, "learning_rate": 5.023792070066313e-05, "loss": 1.552, "step": 18871 }, { "epoch": 0.6758465092126703, "grad_norm": 1.699833869934082, "learning_rate": 5.022786011321089e-05, "loss": 1.2069, "step": 18872 }, { "epoch": 0.6758823213422386, "grad_norm": 1.6764025688171387, "learning_rate": 5.021780019537862e-05, "loss": 1.5697, "step": 18873 }, { "epoch": 0.6759181334718068, "grad_norm": 1.3379871845245361, "learning_rate": 5.02077409473016e-05, "loss": 1.5602, "step": 18874 }, { "epoch": 0.6759539456013752, "grad_norm": 2.3208539485931396, "learning_rate": 5.019768236911519e-05, "loss": 1.5169, "step": 18875 }, { "epoch": 0.6759897577309435, "grad_norm": 1.4288512468338013, "learning_rate": 5.018762446095476e-05, "loss": 1.1977, "step": 18876 }, { "epoch": 0.6760255698605118, "grad_norm": 1.9357956647872925, "learning_rate": 5.017756722295557e-05, "loss": 1.1796, "step": 18877 }, { "epoch": 0.67606138199008, "grad_norm": 1.7917031049728394, "learning_rate": 5.016751065525292e-05, "loss": 1.3224, "step": 18878 }, { "epoch": 0.6760971941196483, "grad_norm": 1.693039059638977, "learning_rate": 5.015745475798215e-05, "loss": 1.3998, "step": 18879 }, { "epoch": 0.6761330062492166, "grad_norm": 1.3678598403930664, "learning_rate": 5.014739953127857e-05, "loss": 1.185, "step": 18880 }, { "epoch": 0.6761688183787848, "grad_norm": 1.3098324537277222, "learning_rate": 5.013734497527739e-05, "loss": 1.3323, "step": 18881 }, { "epoch": 0.6762046305083532, "grad_norm": 1.487381935119629, "learning_rate": 5.0127291090113917e-05, "loss": 1.3497, "step": 18882 }, { "epoch": 0.6762404426379215, "grad_norm": 2.2838425636291504, "learning_rate": 5.011723787592344e-05, "loss": 1.5023, "step": 18883 }, { "epoch": 0.6762762547674898, "grad_norm": 1.4968982934951782, "learning_rate": 5.0107185332841155e-05, "loss": 1.221, "step": 18884 }, { "epoch": 0.676312066897058, "grad_norm": 2.1041419506073, "learning_rate": 5.009713346100235e-05, "loss": 2.0279, "step": 18885 }, { "epoch": 0.6763478790266263, "grad_norm": 1.8831173181533813, "learning_rate": 5.008708226054219e-05, "loss": 1.6359, "step": 18886 }, { "epoch": 0.6763836911561946, "grad_norm": 1.8215208053588867, "learning_rate": 5.007703173159604e-05, "loss": 1.1922, "step": 18887 }, { "epoch": 0.6764195032857628, "grad_norm": 1.7861186265945435, "learning_rate": 5.0066981874298967e-05, "loss": 1.351, "step": 18888 }, { "epoch": 0.6764553154153312, "grad_norm": 1.681621789932251, "learning_rate": 5.0056932688786294e-05, "loss": 1.6124, "step": 18889 }, { "epoch": 0.6764911275448995, "grad_norm": 1.4928348064422607, "learning_rate": 5.00468841751931e-05, "loss": 1.384, "step": 18890 }, { "epoch": 0.6765269396744678, "grad_norm": 1.405524730682373, "learning_rate": 5.0036836333654715e-05, "loss": 1.1791, "step": 18891 }, { "epoch": 0.676562751804036, "grad_norm": 1.3856693506240845, "learning_rate": 5.0026789164306255e-05, "loss": 1.5605, "step": 18892 }, { "epoch": 0.6765985639336043, "grad_norm": 1.7855710983276367, "learning_rate": 5.00167426672828e-05, "loss": 1.2509, "step": 18893 }, { "epoch": 0.6766343760631726, "grad_norm": 1.4088751077651978, "learning_rate": 5.000669684271968e-05, "loss": 1.4611, "step": 18894 }, { "epoch": 0.6766701881927408, "grad_norm": 1.4906492233276367, "learning_rate": 4.999665169075193e-05, "loss": 1.2085, "step": 18895 }, { "epoch": 0.6767060003223092, "grad_norm": 1.3844395875930786, "learning_rate": 4.998660721151476e-05, "loss": 1.0361, "step": 18896 }, { "epoch": 0.6767418124518775, "grad_norm": 1.425974726676941, "learning_rate": 4.997656340514321e-05, "loss": 1.4404, "step": 18897 }, { "epoch": 0.6767776245814457, "grad_norm": 2.3073105812072754, "learning_rate": 4.996652027177255e-05, "loss": 1.4989, "step": 18898 }, { "epoch": 0.676813436711014, "grad_norm": 2.7531237602233887, "learning_rate": 4.995647781153778e-05, "loss": 1.6744, "step": 18899 }, { "epoch": 0.6768492488405823, "grad_norm": 2.012333869934082, "learning_rate": 4.99464360245741e-05, "loss": 1.572, "step": 18900 }, { "epoch": 0.6768850609701506, "grad_norm": 1.7777117490768433, "learning_rate": 4.9936394911016504e-05, "loss": 1.5683, "step": 18901 }, { "epoch": 0.6769208730997188, "grad_norm": 1.54745614528656, "learning_rate": 4.992635447100015e-05, "loss": 1.6978, "step": 18902 }, { "epoch": 0.6769566852292872, "grad_norm": 2.0074431896209717, "learning_rate": 4.9916314704660126e-05, "loss": 1.3513, "step": 18903 }, { "epoch": 0.6769924973588555, "grad_norm": 1.5793383121490479, "learning_rate": 4.9906275612131424e-05, "loss": 1.4361, "step": 18904 }, { "epoch": 0.6770283094884237, "grad_norm": 2.9052045345306396, "learning_rate": 4.9896237193549244e-05, "loss": 1.671, "step": 18905 }, { "epoch": 0.677064121617992, "grad_norm": 2.1424288749694824, "learning_rate": 4.988619944904852e-05, "loss": 1.5612, "step": 18906 }, { "epoch": 0.6770999337475603, "grad_norm": 1.384803295135498, "learning_rate": 4.987616237876438e-05, "loss": 1.2562, "step": 18907 }, { "epoch": 0.6771357458771285, "grad_norm": 1.6141088008880615, "learning_rate": 4.9866125982831745e-05, "loss": 1.4878, "step": 18908 }, { "epoch": 0.6771715580066968, "grad_norm": 2.246306896209717, "learning_rate": 4.9856090261385793e-05, "loss": 1.3069, "step": 18909 }, { "epoch": 0.6772073701362652, "grad_norm": 1.907278060913086, "learning_rate": 4.984605521456146e-05, "loss": 1.3377, "step": 18910 }, { "epoch": 0.6772431822658335, "grad_norm": 1.491541862487793, "learning_rate": 4.983602084249372e-05, "loss": 1.7067, "step": 18911 }, { "epoch": 0.6772789943954017, "grad_norm": 1.672043800354004, "learning_rate": 4.982598714531762e-05, "loss": 1.3417, "step": 18912 }, { "epoch": 0.67731480652497, "grad_norm": 1.6884132623672485, "learning_rate": 4.981595412316815e-05, "loss": 1.5376, "step": 18913 }, { "epoch": 0.6773506186545383, "grad_norm": 1.6035497188568115, "learning_rate": 4.980592177618031e-05, "loss": 1.2029, "step": 18914 }, { "epoch": 0.6773864307841065, "grad_norm": 1.87796151638031, "learning_rate": 4.979589010448902e-05, "loss": 1.2361, "step": 18915 }, { "epoch": 0.6774222429136748, "grad_norm": 1.6658316850662231, "learning_rate": 4.978585910822926e-05, "loss": 1.2257, "step": 18916 }, { "epoch": 0.6774580550432432, "grad_norm": 1.7389546632766724, "learning_rate": 4.977582878753599e-05, "loss": 1.6042, "step": 18917 }, { "epoch": 0.6774938671728115, "grad_norm": 1.2868759632110596, "learning_rate": 4.9765799142544215e-05, "loss": 1.479, "step": 18918 }, { "epoch": 0.6775296793023797, "grad_norm": 1.504171371459961, "learning_rate": 4.975577017338876e-05, "loss": 1.2906, "step": 18919 }, { "epoch": 0.677565491431948, "grad_norm": 1.5632758140563965, "learning_rate": 4.9745741880204613e-05, "loss": 1.4864, "step": 18920 }, { "epoch": 0.6776013035615163, "grad_norm": 1.4741456508636475, "learning_rate": 4.973571426312673e-05, "loss": 1.2293, "step": 18921 }, { "epoch": 0.6776371156910845, "grad_norm": 1.4424731731414795, "learning_rate": 4.9725687322289926e-05, "loss": 1.4578, "step": 18922 }, { "epoch": 0.6776729278206528, "grad_norm": 1.7455040216445923, "learning_rate": 4.971566105782916e-05, "loss": 1.6631, "step": 18923 }, { "epoch": 0.6777087399502212, "grad_norm": 2.1548445224761963, "learning_rate": 4.9705635469879306e-05, "loss": 1.4788, "step": 18924 }, { "epoch": 0.6777445520797895, "grad_norm": 1.7783231735229492, "learning_rate": 4.969561055857529e-05, "loss": 1.2761, "step": 18925 }, { "epoch": 0.6777803642093577, "grad_norm": 1.5366977453231812, "learning_rate": 4.9685586324051915e-05, "loss": 1.4405, "step": 18926 }, { "epoch": 0.677816176338926, "grad_norm": 1.5162105560302734, "learning_rate": 4.967556276644406e-05, "loss": 1.2265, "step": 18927 }, { "epoch": 0.6778519884684943, "grad_norm": 1.6410696506500244, "learning_rate": 4.966553988588665e-05, "loss": 1.3447, "step": 18928 }, { "epoch": 0.6778878005980625, "grad_norm": 2.0513529777526855, "learning_rate": 4.965551768251442e-05, "loss": 1.3223, "step": 18929 }, { "epoch": 0.6779236127276308, "grad_norm": 1.720237374305725, "learning_rate": 4.9645496156462266e-05, "loss": 1.5513, "step": 18930 }, { "epoch": 0.6779594248571992, "grad_norm": 1.4774359464645386, "learning_rate": 4.963547530786501e-05, "loss": 1.555, "step": 18931 }, { "epoch": 0.6779952369867674, "grad_norm": 1.7184780836105347, "learning_rate": 4.962545513685751e-05, "loss": 1.3769, "step": 18932 }, { "epoch": 0.6780310491163357, "grad_norm": 1.2298649549484253, "learning_rate": 4.961543564357449e-05, "loss": 1.2371, "step": 18933 }, { "epoch": 0.678066861245904, "grad_norm": 1.4467629194259644, "learning_rate": 4.9605416828150795e-05, "loss": 1.5372, "step": 18934 }, { "epoch": 0.6781026733754723, "grad_norm": 1.4205254316329956, "learning_rate": 4.959539869072121e-05, "loss": 1.6665, "step": 18935 }, { "epoch": 0.6781384855050405, "grad_norm": 1.92221999168396, "learning_rate": 4.958538123142056e-05, "loss": 1.1817, "step": 18936 }, { "epoch": 0.6781742976346088, "grad_norm": 1.6638027429580688, "learning_rate": 4.957536445038353e-05, "loss": 1.3898, "step": 18937 }, { "epoch": 0.6782101097641772, "grad_norm": 1.847952961921692, "learning_rate": 4.9565348347744934e-05, "loss": 1.5683, "step": 18938 }, { "epoch": 0.6782459218937454, "grad_norm": 1.6598225831985474, "learning_rate": 4.955533292363955e-05, "loss": 1.3324, "step": 18939 }, { "epoch": 0.6782817340233137, "grad_norm": 1.522696852684021, "learning_rate": 4.954531817820206e-05, "loss": 1.4984, "step": 18940 }, { "epoch": 0.678317546152882, "grad_norm": 1.516022801399231, "learning_rate": 4.953530411156724e-05, "loss": 1.3997, "step": 18941 }, { "epoch": 0.6783533582824502, "grad_norm": 1.4114199876785278, "learning_rate": 4.95252907238698e-05, "loss": 1.3475, "step": 18942 }, { "epoch": 0.6783891704120185, "grad_norm": 1.7230263948440552, "learning_rate": 4.95152780152445e-05, "loss": 1.5503, "step": 18943 }, { "epoch": 0.6784249825415868, "grad_norm": 1.6824709177017212, "learning_rate": 4.9505265985825976e-05, "loss": 1.6195, "step": 18944 }, { "epoch": 0.6784607946711552, "grad_norm": 1.916304588317871, "learning_rate": 4.9495254635748975e-05, "loss": 1.4234, "step": 18945 }, { "epoch": 0.6784966068007234, "grad_norm": 1.438887596130371, "learning_rate": 4.948524396514821e-05, "loss": 1.351, "step": 18946 }, { "epoch": 0.6785324189302917, "grad_norm": 1.4166353940963745, "learning_rate": 4.947523397415829e-05, "loss": 1.6459, "step": 18947 }, { "epoch": 0.67856823105986, "grad_norm": 1.5291413068771362, "learning_rate": 4.9465224662913925e-05, "loss": 1.2955, "step": 18948 }, { "epoch": 0.6786040431894282, "grad_norm": 1.7831827402114868, "learning_rate": 4.9455216031549766e-05, "loss": 1.5585, "step": 18949 }, { "epoch": 0.6786398553189965, "grad_norm": 1.5598150491714478, "learning_rate": 4.9445208080200536e-05, "loss": 1.3291, "step": 18950 }, { "epoch": 0.6786756674485648, "grad_norm": 1.9024678468704224, "learning_rate": 4.943520080900076e-05, "loss": 1.536, "step": 18951 }, { "epoch": 0.6787114795781332, "grad_norm": 1.5680241584777832, "learning_rate": 4.9425194218085145e-05, "loss": 1.4492, "step": 18952 }, { "epoch": 0.6787472917077014, "grad_norm": 1.4497802257537842, "learning_rate": 4.94151883075883e-05, "loss": 1.5935, "step": 18953 }, { "epoch": 0.6787831038372697, "grad_norm": 1.754224419593811, "learning_rate": 4.940518307764489e-05, "loss": 1.3861, "step": 18954 }, { "epoch": 0.678818915966838, "grad_norm": 1.9332152605056763, "learning_rate": 4.939517852838944e-05, "loss": 1.2178, "step": 18955 }, { "epoch": 0.6788547280964062, "grad_norm": 1.623934268951416, "learning_rate": 4.938517465995659e-05, "loss": 1.5194, "step": 18956 }, { "epoch": 0.6788905402259745, "grad_norm": 1.711578369140625, "learning_rate": 4.937517147248096e-05, "loss": 1.8629, "step": 18957 }, { "epoch": 0.6789263523555428, "grad_norm": 2.0465073585510254, "learning_rate": 4.936516896609707e-05, "loss": 1.6172, "step": 18958 }, { "epoch": 0.6789621644851112, "grad_norm": 2.6911144256591797, "learning_rate": 4.9355167140939494e-05, "loss": 1.484, "step": 18959 }, { "epoch": 0.6789979766146794, "grad_norm": 2.0437419414520264, "learning_rate": 4.934516599714284e-05, "loss": 1.3593, "step": 18960 }, { "epoch": 0.6790337887442477, "grad_norm": 1.5595124959945679, "learning_rate": 4.933516553484167e-05, "loss": 1.9304, "step": 18961 }, { "epoch": 0.679069600873816, "grad_norm": 1.714911699295044, "learning_rate": 4.9325165754170446e-05, "loss": 1.1749, "step": 18962 }, { "epoch": 0.6791054130033842, "grad_norm": 1.7814215421676636, "learning_rate": 4.931516665526376e-05, "loss": 1.5922, "step": 18963 }, { "epoch": 0.6791412251329525, "grad_norm": 1.5183871984481812, "learning_rate": 4.930516823825616e-05, "loss": 1.4136, "step": 18964 }, { "epoch": 0.6791770372625208, "grad_norm": 1.6205239295959473, "learning_rate": 4.9295170503282095e-05, "loss": 1.5224, "step": 18965 }, { "epoch": 0.6792128493920891, "grad_norm": 1.9067203998565674, "learning_rate": 4.928517345047611e-05, "loss": 1.3327, "step": 18966 }, { "epoch": 0.6792486615216574, "grad_norm": 1.7289701700210571, "learning_rate": 4.927517707997269e-05, "loss": 1.3748, "step": 18967 }, { "epoch": 0.6792844736512257, "grad_norm": 1.5952321290969849, "learning_rate": 4.926518139190638e-05, "loss": 1.404, "step": 18968 }, { "epoch": 0.679320285780794, "grad_norm": 1.9176799058914185, "learning_rate": 4.925518638641157e-05, "loss": 1.9199, "step": 18969 }, { "epoch": 0.6793560979103622, "grad_norm": 1.8414347171783447, "learning_rate": 4.924519206362276e-05, "loss": 1.3767, "step": 18970 }, { "epoch": 0.6793919100399305, "grad_norm": 1.6279493570327759, "learning_rate": 4.9235198423674435e-05, "loss": 1.5276, "step": 18971 }, { "epoch": 0.6794277221694988, "grad_norm": 1.5713403224945068, "learning_rate": 4.9225205466701064e-05, "loss": 1.5543, "step": 18972 }, { "epoch": 0.6794635342990671, "grad_norm": 1.7283962965011597, "learning_rate": 4.9215213192837064e-05, "loss": 1.6197, "step": 18973 }, { "epoch": 0.6794993464286354, "grad_norm": 1.273970365524292, "learning_rate": 4.920522160221679e-05, "loss": 1.3018, "step": 18974 }, { "epoch": 0.6795351585582037, "grad_norm": 1.8756721019744873, "learning_rate": 4.91952306949748e-05, "loss": 1.1821, "step": 18975 }, { "epoch": 0.679570970687772, "grad_norm": 1.870096206665039, "learning_rate": 4.918524047124543e-05, "loss": 1.6877, "step": 18976 }, { "epoch": 0.6796067828173402, "grad_norm": 1.3298908472061157, "learning_rate": 4.9175250931163085e-05, "loss": 1.5494, "step": 18977 }, { "epoch": 0.6796425949469085, "grad_norm": 1.4009315967559814, "learning_rate": 4.916526207486219e-05, "loss": 1.4666, "step": 18978 }, { "epoch": 0.6796784070764768, "grad_norm": 1.6784093379974365, "learning_rate": 4.915527390247716e-05, "loss": 1.684, "step": 18979 }, { "epoch": 0.6797142192060451, "grad_norm": 4.324677467346191, "learning_rate": 4.914528641414233e-05, "loss": 1.8545, "step": 18980 }, { "epoch": 0.6797500313356134, "grad_norm": 1.4996967315673828, "learning_rate": 4.9135299609992004e-05, "loss": 1.3009, "step": 18981 }, { "epoch": 0.6797858434651817, "grad_norm": 1.5409586429595947, "learning_rate": 4.912531349016067e-05, "loss": 1.4616, "step": 18982 }, { "epoch": 0.6798216555947499, "grad_norm": 1.7398431301116943, "learning_rate": 4.911532805478259e-05, "loss": 1.2676, "step": 18983 }, { "epoch": 0.6798574677243182, "grad_norm": 1.7865056991577148, "learning_rate": 4.910534330399219e-05, "loss": 1.4283, "step": 18984 }, { "epoch": 0.6798932798538865, "grad_norm": 1.7800086736679077, "learning_rate": 4.909535923792365e-05, "loss": 1.3978, "step": 18985 }, { "epoch": 0.6799290919834547, "grad_norm": 1.393025517463684, "learning_rate": 4.9085375856711465e-05, "loss": 1.5825, "step": 18986 }, { "epoch": 0.6799649041130231, "grad_norm": 1.8537535667419434, "learning_rate": 4.907539316048985e-05, "loss": 1.4, "step": 18987 }, { "epoch": 0.6800007162425914, "grad_norm": 2.8984549045562744, "learning_rate": 4.906541114939313e-05, "loss": 1.4936, "step": 18988 }, { "epoch": 0.6800365283721597, "grad_norm": 2.0612361431121826, "learning_rate": 4.9055429823555624e-05, "loss": 1.5854, "step": 18989 }, { "epoch": 0.6800723405017279, "grad_norm": 1.7456159591674805, "learning_rate": 4.9045449183111566e-05, "loss": 1.1949, "step": 18990 }, { "epoch": 0.6801081526312962, "grad_norm": 1.8124316930770874, "learning_rate": 4.903546922819531e-05, "loss": 1.5129, "step": 18991 }, { "epoch": 0.6801439647608645, "grad_norm": 1.6117404699325562, "learning_rate": 4.9025489958940985e-05, "loss": 1.5364, "step": 18992 }, { "epoch": 0.6801797768904327, "grad_norm": 1.6479026079177856, "learning_rate": 4.9015511375483026e-05, "loss": 1.2438, "step": 18993 }, { "epoch": 0.6802155890200011, "grad_norm": 1.5882446765899658, "learning_rate": 4.900553347795556e-05, "loss": 1.2344, "step": 18994 }, { "epoch": 0.6802514011495694, "grad_norm": 1.787760615348816, "learning_rate": 4.899555626649289e-05, "loss": 1.066, "step": 18995 }, { "epoch": 0.6802872132791377, "grad_norm": 2.0816078186035156, "learning_rate": 4.898557974122915e-05, "loss": 1.528, "step": 18996 }, { "epoch": 0.6803230254087059, "grad_norm": 2.4905543327331543, "learning_rate": 4.8975603902298704e-05, "loss": 1.3881, "step": 18997 }, { "epoch": 0.6803588375382742, "grad_norm": 1.876359224319458, "learning_rate": 4.896562874983569e-05, "loss": 1.1194, "step": 18998 }, { "epoch": 0.6803946496678425, "grad_norm": 1.7218693494796753, "learning_rate": 4.8955654283974284e-05, "loss": 1.4642, "step": 18999 }, { "epoch": 0.6804304617974107, "grad_norm": 1.4344909191131592, "learning_rate": 4.89456805048487e-05, "loss": 1.5587, "step": 19000 }, { "epoch": 0.6804662739269791, "grad_norm": 2.262502670288086, "learning_rate": 4.893570741259312e-05, "loss": 1.7689, "step": 19001 }, { "epoch": 0.6805020860565474, "grad_norm": 1.9338393211364746, "learning_rate": 4.892573500734179e-05, "loss": 1.4208, "step": 19002 }, { "epoch": 0.6805378981861157, "grad_norm": 1.5523253679275513, "learning_rate": 4.891576328922872e-05, "loss": 1.3446, "step": 19003 }, { "epoch": 0.6805737103156839, "grad_norm": 2.149492025375366, "learning_rate": 4.890579225838824e-05, "loss": 1.575, "step": 19004 }, { "epoch": 0.6806095224452522, "grad_norm": 1.690125823020935, "learning_rate": 4.8895821914954376e-05, "loss": 1.3665, "step": 19005 }, { "epoch": 0.6806453345748205, "grad_norm": 1.6124178171157837, "learning_rate": 4.888585225906136e-05, "loss": 1.3594, "step": 19006 }, { "epoch": 0.6806811467043887, "grad_norm": 2.0891807079315186, "learning_rate": 4.8875883290843214e-05, "loss": 1.3593, "step": 19007 }, { "epoch": 0.6807169588339571, "grad_norm": 2.1637518405914307, "learning_rate": 4.886591501043413e-05, "loss": 1.6377, "step": 19008 }, { "epoch": 0.6807527709635254, "grad_norm": 1.429513692855835, "learning_rate": 4.885594741796823e-05, "loss": 1.3786, "step": 19009 }, { "epoch": 0.6807885830930936, "grad_norm": 1.508383870124817, "learning_rate": 4.884598051357955e-05, "loss": 1.5587, "step": 19010 }, { "epoch": 0.6808243952226619, "grad_norm": 1.646700143814087, "learning_rate": 4.883601429740222e-05, "loss": 1.3783, "step": 19011 }, { "epoch": 0.6808602073522302, "grad_norm": 1.7303789854049683, "learning_rate": 4.882604876957032e-05, "loss": 1.667, "step": 19012 }, { "epoch": 0.6808960194817985, "grad_norm": 1.8976233005523682, "learning_rate": 4.881608393021796e-05, "loss": 1.5103, "step": 19013 }, { "epoch": 0.6809318316113667, "grad_norm": 1.9167366027832031, "learning_rate": 4.880611977947909e-05, "loss": 1.4182, "step": 19014 }, { "epoch": 0.6809676437409351, "grad_norm": 1.572396993637085, "learning_rate": 4.879615631748793e-05, "loss": 1.7171, "step": 19015 }, { "epoch": 0.6810034558705034, "grad_norm": 1.5460922718048096, "learning_rate": 4.8786193544378424e-05, "loss": 1.5438, "step": 19016 }, { "epoch": 0.6810392680000716, "grad_norm": 1.4947601556777954, "learning_rate": 4.8776231460284595e-05, "loss": 1.655, "step": 19017 }, { "epoch": 0.6810750801296399, "grad_norm": 1.6060175895690918, "learning_rate": 4.876627006534049e-05, "loss": 1.5582, "step": 19018 }, { "epoch": 0.6811108922592082, "grad_norm": 1.5002024173736572, "learning_rate": 4.8756309359680145e-05, "loss": 1.3706, "step": 19019 }, { "epoch": 0.6811467043887764, "grad_norm": 1.6578645706176758, "learning_rate": 4.874634934343759e-05, "loss": 1.451, "step": 19020 }, { "epoch": 0.6811825165183447, "grad_norm": 1.4593560695648193, "learning_rate": 4.873639001674676e-05, "loss": 1.2581, "step": 19021 }, { "epoch": 0.6812183286479131, "grad_norm": 1.5626795291900635, "learning_rate": 4.872643137974167e-05, "loss": 1.5399, "step": 19022 }, { "epoch": 0.6812541407774814, "grad_norm": 1.6952016353607178, "learning_rate": 4.87164734325563e-05, "loss": 1.4, "step": 19023 }, { "epoch": 0.6812899529070496, "grad_norm": 1.9732818603515625, "learning_rate": 4.870651617532468e-05, "loss": 1.5631, "step": 19024 }, { "epoch": 0.6813257650366179, "grad_norm": 1.7756799459457397, "learning_rate": 4.869655960818068e-05, "loss": 1.2446, "step": 19025 }, { "epoch": 0.6813615771661862, "grad_norm": 1.959659457206726, "learning_rate": 4.868660373125829e-05, "loss": 1.4592, "step": 19026 }, { "epoch": 0.6813973892957544, "grad_norm": 2.5035407543182373, "learning_rate": 4.8676648544691495e-05, "loss": 1.5678, "step": 19027 }, { "epoch": 0.6814332014253227, "grad_norm": 1.5704766511917114, "learning_rate": 4.866669404861416e-05, "loss": 1.5047, "step": 19028 }, { "epoch": 0.6814690135548911, "grad_norm": 2.848478078842163, "learning_rate": 4.8656740243160236e-05, "loss": 1.8893, "step": 19029 }, { "epoch": 0.6815048256844594, "grad_norm": 1.4593943357467651, "learning_rate": 4.864678712846365e-05, "loss": 1.2788, "step": 19030 }, { "epoch": 0.6815406378140276, "grad_norm": 1.6539605855941772, "learning_rate": 4.863683470465833e-05, "loss": 1.2998, "step": 19031 }, { "epoch": 0.6815764499435959, "grad_norm": 1.4724090099334717, "learning_rate": 4.862688297187812e-05, "loss": 1.5732, "step": 19032 }, { "epoch": 0.6816122620731642, "grad_norm": 2.0338592529296875, "learning_rate": 4.8616931930256926e-05, "loss": 1.4792, "step": 19033 }, { "epoch": 0.6816480742027324, "grad_norm": 1.6050416231155396, "learning_rate": 4.860698157992867e-05, "loss": 1.0027, "step": 19034 }, { "epoch": 0.6816838863323007, "grad_norm": 1.8929771184921265, "learning_rate": 4.859703192102715e-05, "loss": 1.3524, "step": 19035 }, { "epoch": 0.6817196984618691, "grad_norm": 1.4023357629776, "learning_rate": 4.858708295368626e-05, "loss": 1.4543, "step": 19036 }, { "epoch": 0.6817555105914374, "grad_norm": 1.6556355953216553, "learning_rate": 4.857713467803985e-05, "loss": 1.6469, "step": 19037 }, { "epoch": 0.6817913227210056, "grad_norm": 2.0519371032714844, "learning_rate": 4.85671870942218e-05, "loss": 1.5793, "step": 19038 }, { "epoch": 0.6818271348505739, "grad_norm": 1.6347476243972778, "learning_rate": 4.855724020236586e-05, "loss": 1.6161, "step": 19039 }, { "epoch": 0.6818629469801422, "grad_norm": 1.3786358833312988, "learning_rate": 4.854729400260591e-05, "loss": 1.5669, "step": 19040 }, { "epoch": 0.6818987591097104, "grad_norm": 1.5543736219406128, "learning_rate": 4.853734849507574e-05, "loss": 1.5422, "step": 19041 }, { "epoch": 0.6819345712392787, "grad_norm": 1.4965298175811768, "learning_rate": 4.8527403679909214e-05, "loss": 1.1551, "step": 19042 }, { "epoch": 0.6819703833688471, "grad_norm": 1.4858630895614624, "learning_rate": 4.851745955724002e-05, "loss": 1.5663, "step": 19043 }, { "epoch": 0.6820061954984153, "grad_norm": 1.4483484029769897, "learning_rate": 4.8507516127202014e-05, "loss": 1.1364, "step": 19044 }, { "epoch": 0.6820420076279836, "grad_norm": 1.929543375968933, "learning_rate": 4.849757338992898e-05, "loss": 1.5264, "step": 19045 }, { "epoch": 0.6820778197575519, "grad_norm": 1.2829221487045288, "learning_rate": 4.848763134555465e-05, "loss": 1.1193, "step": 19046 }, { "epoch": 0.6821136318871202, "grad_norm": 1.5956387519836426, "learning_rate": 4.847768999421277e-05, "loss": 1.2601, "step": 19047 }, { "epoch": 0.6821494440166884, "grad_norm": 2.0086724758148193, "learning_rate": 4.8467749336037124e-05, "loss": 1.1854, "step": 19048 }, { "epoch": 0.6821852561462567, "grad_norm": 1.4452983140945435, "learning_rate": 4.8457809371161476e-05, "loss": 1.3118, "step": 19049 }, { "epoch": 0.6822210682758251, "grad_norm": 1.4968582391738892, "learning_rate": 4.844787009971949e-05, "loss": 1.4051, "step": 19050 }, { "epoch": 0.6822568804053933, "grad_norm": 1.3636406660079956, "learning_rate": 4.8437931521844894e-05, "loss": 1.3358, "step": 19051 }, { "epoch": 0.6822926925349616, "grad_norm": 1.9346957206726074, "learning_rate": 4.8427993637671474e-05, "loss": 1.9677, "step": 19052 }, { "epoch": 0.6823285046645299, "grad_norm": 1.4876221418380737, "learning_rate": 4.841805644733283e-05, "loss": 1.6532, "step": 19053 }, { "epoch": 0.6823643167940981, "grad_norm": 1.683264136314392, "learning_rate": 4.8408119950962704e-05, "loss": 1.6318, "step": 19054 }, { "epoch": 0.6824001289236664, "grad_norm": 1.3608731031417847, "learning_rate": 4.839818414869477e-05, "loss": 1.3656, "step": 19055 }, { "epoch": 0.6824359410532347, "grad_norm": 1.9160677194595337, "learning_rate": 4.8388249040662744e-05, "loss": 1.5936, "step": 19056 }, { "epoch": 0.6824717531828031, "grad_norm": 1.7479875087738037, "learning_rate": 4.8378314627000224e-05, "loss": 1.7994, "step": 19057 }, { "epoch": 0.6825075653123713, "grad_norm": 2.653571367263794, "learning_rate": 4.836838090784088e-05, "loss": 1.5861, "step": 19058 }, { "epoch": 0.6825433774419396, "grad_norm": 1.605258584022522, "learning_rate": 4.835844788331839e-05, "loss": 1.5436, "step": 19059 }, { "epoch": 0.6825791895715079, "grad_norm": 1.4455996751785278, "learning_rate": 4.8348515553566396e-05, "loss": 1.2487, "step": 19060 }, { "epoch": 0.6826150017010761, "grad_norm": 1.330987572669983, "learning_rate": 4.833858391871846e-05, "loss": 1.4588, "step": 19061 }, { "epoch": 0.6826508138306444, "grad_norm": 2.3444483280181885, "learning_rate": 4.832865297890825e-05, "loss": 1.5377, "step": 19062 }, { "epoch": 0.6826866259602127, "grad_norm": 1.7140589952468872, "learning_rate": 4.83187227342694e-05, "loss": 1.2662, "step": 19063 }, { "epoch": 0.6827224380897811, "grad_norm": 1.2725976705551147, "learning_rate": 4.830879318493542e-05, "loss": 1.3755, "step": 19064 }, { "epoch": 0.6827582502193493, "grad_norm": 1.4281116724014282, "learning_rate": 4.829886433103995e-05, "loss": 1.4356, "step": 19065 }, { "epoch": 0.6827940623489176, "grad_norm": 1.4100066423416138, "learning_rate": 4.828893617271658e-05, "loss": 1.3519, "step": 19066 }, { "epoch": 0.6828298744784859, "grad_norm": 1.5357187986373901, "learning_rate": 4.8279008710098916e-05, "loss": 1.6721, "step": 19067 }, { "epoch": 0.6828656866080541, "grad_norm": 1.6073108911514282, "learning_rate": 4.8269081943320424e-05, "loss": 1.4553, "step": 19068 }, { "epoch": 0.6829014987376224, "grad_norm": 1.8977751731872559, "learning_rate": 4.825915587251472e-05, "loss": 1.4387, "step": 19069 }, { "epoch": 0.6829373108671907, "grad_norm": 1.570206642150879, "learning_rate": 4.824923049781536e-05, "loss": 1.2027, "step": 19070 }, { "epoch": 0.682973122996759, "grad_norm": 1.6373735666275024, "learning_rate": 4.8239305819355805e-05, "loss": 1.7076, "step": 19071 }, { "epoch": 0.6830089351263273, "grad_norm": 2.1128671169281006, "learning_rate": 4.822938183726967e-05, "loss": 1.4418, "step": 19072 }, { "epoch": 0.6830447472558956, "grad_norm": 1.6894068717956543, "learning_rate": 4.821945855169035e-05, "loss": 1.7592, "step": 19073 }, { "epoch": 0.6830805593854639, "grad_norm": 1.6356310844421387, "learning_rate": 4.8209535962751494e-05, "loss": 1.4973, "step": 19074 }, { "epoch": 0.6831163715150321, "grad_norm": 1.7310285568237305, "learning_rate": 4.81996140705865e-05, "loss": 1.5372, "step": 19075 }, { "epoch": 0.6831521836446004, "grad_norm": 1.616203784942627, "learning_rate": 4.8189692875328864e-05, "loss": 1.6467, "step": 19076 }, { "epoch": 0.6831879957741687, "grad_norm": 1.7261005640029907, "learning_rate": 4.817977237711213e-05, "loss": 1.5143, "step": 19077 }, { "epoch": 0.683223807903737, "grad_norm": 1.4400568008422852, "learning_rate": 4.816985257606967e-05, "loss": 1.3393, "step": 19078 }, { "epoch": 0.6832596200333053, "grad_norm": 2.898552179336548, "learning_rate": 4.815993347233503e-05, "loss": 1.7106, "step": 19079 }, { "epoch": 0.6832954321628736, "grad_norm": 1.6326524019241333, "learning_rate": 4.8150015066041545e-05, "loss": 1.2989, "step": 19080 }, { "epoch": 0.6833312442924419, "grad_norm": 1.612429141998291, "learning_rate": 4.814009735732279e-05, "loss": 1.4414, "step": 19081 }, { "epoch": 0.6833670564220101, "grad_norm": 1.492590308189392, "learning_rate": 4.8130180346312105e-05, "loss": 1.5981, "step": 19082 }, { "epoch": 0.6834028685515784, "grad_norm": 1.8465263843536377, "learning_rate": 4.812026403314297e-05, "loss": 1.37, "step": 19083 }, { "epoch": 0.6834386806811467, "grad_norm": 1.8145172595977783, "learning_rate": 4.811034841794868e-05, "loss": 1.3823, "step": 19084 }, { "epoch": 0.683474492810715, "grad_norm": 1.4729621410369873, "learning_rate": 4.8100433500862794e-05, "loss": 1.4076, "step": 19085 }, { "epoch": 0.6835103049402833, "grad_norm": 2.7632548809051514, "learning_rate": 4.809051928201864e-05, "loss": 1.2546, "step": 19086 }, { "epoch": 0.6835461170698516, "grad_norm": 1.296805739402771, "learning_rate": 4.808060576154951e-05, "loss": 1.1367, "step": 19087 }, { "epoch": 0.6835819291994198, "grad_norm": 1.7383794784545898, "learning_rate": 4.8070692939588934e-05, "loss": 1.4106, "step": 19088 }, { "epoch": 0.6836177413289881, "grad_norm": 2.4695076942443848, "learning_rate": 4.8060780816270165e-05, "loss": 1.3039, "step": 19089 }, { "epoch": 0.6836535534585564, "grad_norm": 1.2985374927520752, "learning_rate": 4.805086939172663e-05, "loss": 1.5119, "step": 19090 }, { "epoch": 0.6836893655881247, "grad_norm": 1.8521593809127808, "learning_rate": 4.804095866609156e-05, "loss": 1.4941, "step": 19091 }, { "epoch": 0.683725177717693, "grad_norm": 1.4810516834259033, "learning_rate": 4.803104863949844e-05, "loss": 1.5291, "step": 19092 }, { "epoch": 0.6837609898472613, "grad_norm": 1.5847200155258179, "learning_rate": 4.80211393120805e-05, "loss": 1.6587, "step": 19093 }, { "epoch": 0.6837968019768296, "grad_norm": 1.4615356922149658, "learning_rate": 4.801123068397111e-05, "loss": 1.2841, "step": 19094 }, { "epoch": 0.6838326141063978, "grad_norm": 2.2419803142547607, "learning_rate": 4.800132275530351e-05, "loss": 1.2867, "step": 19095 }, { "epoch": 0.6838684262359661, "grad_norm": 1.2413926124572754, "learning_rate": 4.799141552621105e-05, "loss": 1.3555, "step": 19096 }, { "epoch": 0.6839042383655344, "grad_norm": 1.478020191192627, "learning_rate": 4.798150899682704e-05, "loss": 1.4722, "step": 19097 }, { "epoch": 0.6839400504951026, "grad_norm": 1.8806427717208862, "learning_rate": 4.79716031672847e-05, "loss": 1.7017, "step": 19098 }, { "epoch": 0.683975862624671, "grad_norm": 1.7698049545288086, "learning_rate": 4.7961698037717306e-05, "loss": 1.4021, "step": 19099 }, { "epoch": 0.6840116747542393, "grad_norm": 1.7766374349594116, "learning_rate": 4.795179360825815e-05, "loss": 1.5161, "step": 19100 }, { "epoch": 0.6840474868838076, "grad_norm": 1.346203088760376, "learning_rate": 4.794188987904051e-05, "loss": 1.3395, "step": 19101 }, { "epoch": 0.6840832990133758, "grad_norm": 1.899402141571045, "learning_rate": 4.793198685019753e-05, "loss": 1.436, "step": 19102 }, { "epoch": 0.6841191111429441, "grad_norm": 1.439682126045227, "learning_rate": 4.7922084521862565e-05, "loss": 1.3542, "step": 19103 }, { "epoch": 0.6841549232725124, "grad_norm": 1.2503219842910767, "learning_rate": 4.791218289416879e-05, "loss": 1.2213, "step": 19104 }, { "epoch": 0.6841907354020806, "grad_norm": 1.6100847721099854, "learning_rate": 4.790228196724935e-05, "loss": 1.4521, "step": 19105 }, { "epoch": 0.684226547531649, "grad_norm": 1.7780996561050415, "learning_rate": 4.789238174123751e-05, "loss": 1.4513, "step": 19106 }, { "epoch": 0.6842623596612173, "grad_norm": 3.03469181060791, "learning_rate": 4.788248221626647e-05, "loss": 1.3681, "step": 19107 }, { "epoch": 0.6842981717907856, "grad_norm": 1.8146162033081055, "learning_rate": 4.7872583392469436e-05, "loss": 1.3714, "step": 19108 }, { "epoch": 0.6843339839203538, "grad_norm": 1.5532610416412354, "learning_rate": 4.786268526997951e-05, "loss": 1.4951, "step": 19109 }, { "epoch": 0.6843697960499221, "grad_norm": 1.6222275495529175, "learning_rate": 4.7852787848929916e-05, "loss": 1.6106, "step": 19110 }, { "epoch": 0.6844056081794904, "grad_norm": 1.9357314109802246, "learning_rate": 4.7842891129453784e-05, "loss": 1.4879, "step": 19111 }, { "epoch": 0.6844414203090586, "grad_norm": 2.5077223777770996, "learning_rate": 4.783299511168432e-05, "loss": 1.3873, "step": 19112 }, { "epoch": 0.684477232438627, "grad_norm": 2.1032609939575195, "learning_rate": 4.7823099795754566e-05, "loss": 1.42, "step": 19113 }, { "epoch": 0.6845130445681953, "grad_norm": 1.413138508796692, "learning_rate": 4.781320518179772e-05, "loss": 1.5738, "step": 19114 }, { "epoch": 0.6845488566977636, "grad_norm": 1.6410788297653198, "learning_rate": 4.780331126994691e-05, "loss": 1.6905, "step": 19115 }, { "epoch": 0.6845846688273318, "grad_norm": 1.455068826675415, "learning_rate": 4.779341806033517e-05, "loss": 1.2427, "step": 19116 }, { "epoch": 0.6846204809569001, "grad_norm": 1.6743617057800293, "learning_rate": 4.778352555309565e-05, "loss": 1.2889, "step": 19117 }, { "epoch": 0.6846562930864684, "grad_norm": 1.5681772232055664, "learning_rate": 4.777363374836146e-05, "loss": 1.461, "step": 19118 }, { "epoch": 0.6846921052160366, "grad_norm": 1.5241618156433105, "learning_rate": 4.7763742646265674e-05, "loss": 1.6756, "step": 19119 }, { "epoch": 0.684727917345605, "grad_norm": 1.7373355627059937, "learning_rate": 4.7753852246941335e-05, "loss": 1.3074, "step": 19120 }, { "epoch": 0.6847637294751733, "grad_norm": 1.4865138530731201, "learning_rate": 4.774396255052151e-05, "loss": 1.3415, "step": 19121 }, { "epoch": 0.6847995416047415, "grad_norm": 1.9230724573135376, "learning_rate": 4.773407355713929e-05, "loss": 1.6522, "step": 19122 }, { "epoch": 0.6848353537343098, "grad_norm": 1.556607961654663, "learning_rate": 4.7724185266927666e-05, "loss": 1.5193, "step": 19123 }, { "epoch": 0.6848711658638781, "grad_norm": 1.8414274454116821, "learning_rate": 4.7714297680019704e-05, "loss": 1.4167, "step": 19124 }, { "epoch": 0.6849069779934464, "grad_norm": 1.8002582788467407, "learning_rate": 4.770441079654841e-05, "loss": 1.4511, "step": 19125 }, { "epoch": 0.6849427901230146, "grad_norm": 1.7834447622299194, "learning_rate": 4.7694524616646865e-05, "loss": 1.7016, "step": 19126 }, { "epoch": 0.684978602252583, "grad_norm": 1.2345691919326782, "learning_rate": 4.768463914044797e-05, "loss": 1.3703, "step": 19127 }, { "epoch": 0.6850144143821513, "grad_norm": 1.528149127960205, "learning_rate": 4.767475436808478e-05, "loss": 1.4703, "step": 19128 }, { "epoch": 0.6850502265117195, "grad_norm": 1.437954306602478, "learning_rate": 4.766487029969028e-05, "loss": 1.4768, "step": 19129 }, { "epoch": 0.6850860386412878, "grad_norm": 1.682350754737854, "learning_rate": 4.765498693539747e-05, "loss": 1.3122, "step": 19130 }, { "epoch": 0.6851218507708561, "grad_norm": 2.119572401046753, "learning_rate": 4.764510427533926e-05, "loss": 1.4844, "step": 19131 }, { "epoch": 0.6851576629004243, "grad_norm": 2.683337926864624, "learning_rate": 4.763522231964864e-05, "loss": 1.2952, "step": 19132 }, { "epoch": 0.6851934750299926, "grad_norm": 1.712811827659607, "learning_rate": 4.76253410684586e-05, "loss": 1.5528, "step": 19133 }, { "epoch": 0.685229287159561, "grad_norm": 1.5293190479278564, "learning_rate": 4.761546052190199e-05, "loss": 1.2965, "step": 19134 }, { "epoch": 0.6852650992891293, "grad_norm": 1.6144920587539673, "learning_rate": 4.7605580680111785e-05, "loss": 1.313, "step": 19135 }, { "epoch": 0.6853009114186975, "grad_norm": 1.2514257431030273, "learning_rate": 4.7595701543220916e-05, "loss": 1.3082, "step": 19136 }, { "epoch": 0.6853367235482658, "grad_norm": 1.5040967464447021, "learning_rate": 4.758582311136231e-05, "loss": 1.9045, "step": 19137 }, { "epoch": 0.6853725356778341, "grad_norm": 1.5086262226104736, "learning_rate": 4.757594538466883e-05, "loss": 1.5548, "step": 19138 }, { "epoch": 0.6854083478074023, "grad_norm": 1.7702431678771973, "learning_rate": 4.756606836327337e-05, "loss": 1.2219, "step": 19139 }, { "epoch": 0.6854441599369706, "grad_norm": 1.6345676183700562, "learning_rate": 4.755619204730886e-05, "loss": 1.534, "step": 19140 }, { "epoch": 0.685479972066539, "grad_norm": 1.7369307279586792, "learning_rate": 4.75463164369081e-05, "loss": 1.377, "step": 19141 }, { "epoch": 0.6855157841961073, "grad_norm": 2.0801753997802734, "learning_rate": 4.7536441532204e-05, "loss": 1.633, "step": 19142 }, { "epoch": 0.6855515963256755, "grad_norm": 1.7809818983078003, "learning_rate": 4.752656733332941e-05, "loss": 1.6268, "step": 19143 }, { "epoch": 0.6855874084552438, "grad_norm": 2.1582162380218506, "learning_rate": 4.751669384041719e-05, "loss": 1.4487, "step": 19144 }, { "epoch": 0.6856232205848121, "grad_norm": 1.6797009706497192, "learning_rate": 4.750682105360014e-05, "loss": 1.5636, "step": 19145 }, { "epoch": 0.6856590327143803, "grad_norm": 2.4573843479156494, "learning_rate": 4.749694897301108e-05, "loss": 1.4913, "step": 19146 }, { "epoch": 0.6856948448439486, "grad_norm": 1.3981280326843262, "learning_rate": 4.7487077598782856e-05, "loss": 1.392, "step": 19147 }, { "epoch": 0.685730656973517, "grad_norm": 1.4135205745697021, "learning_rate": 4.747720693104831e-05, "loss": 1.4138, "step": 19148 }, { "epoch": 0.6857664691030853, "grad_norm": 1.4483755826950073, "learning_rate": 4.7467336969940156e-05, "loss": 1.1195, "step": 19149 }, { "epoch": 0.6858022812326535, "grad_norm": 1.7879372835159302, "learning_rate": 4.745746771559122e-05, "loss": 1.1896, "step": 19150 }, { "epoch": 0.6858380933622218, "grad_norm": 1.420569896697998, "learning_rate": 4.744759916813432e-05, "loss": 1.5825, "step": 19151 }, { "epoch": 0.6858739054917901, "grad_norm": 1.677254319190979, "learning_rate": 4.743773132770214e-05, "loss": 0.9403, "step": 19152 }, { "epoch": 0.6859097176213583, "grad_norm": 1.9434819221496582, "learning_rate": 4.7427864194427484e-05, "loss": 1.7483, "step": 19153 }, { "epoch": 0.6859455297509266, "grad_norm": 1.6031337976455688, "learning_rate": 4.74179977684431e-05, "loss": 1.3214, "step": 19154 }, { "epoch": 0.685981341880495, "grad_norm": 1.5116249322891235, "learning_rate": 4.740813204988178e-05, "loss": 1.3434, "step": 19155 }, { "epoch": 0.6860171540100632, "grad_norm": 2.093729257583618, "learning_rate": 4.739826703887616e-05, "loss": 1.4053, "step": 19156 }, { "epoch": 0.6860529661396315, "grad_norm": 1.526672124862671, "learning_rate": 4.7388402735559014e-05, "loss": 1.429, "step": 19157 }, { "epoch": 0.6860887782691998, "grad_norm": 1.9945696592330933, "learning_rate": 4.737853914006307e-05, "loss": 1.2103, "step": 19158 }, { "epoch": 0.686124590398768, "grad_norm": 1.6721898317337036, "learning_rate": 4.736867625252097e-05, "loss": 1.1878, "step": 19159 }, { "epoch": 0.6861604025283363, "grad_norm": 1.792447805404663, "learning_rate": 4.735881407306545e-05, "loss": 1.1916, "step": 19160 }, { "epoch": 0.6861962146579046, "grad_norm": 3.155874729156494, "learning_rate": 4.734895260182918e-05, "loss": 1.503, "step": 19161 }, { "epoch": 0.686232026787473, "grad_norm": 1.5070594549179077, "learning_rate": 4.733909183894487e-05, "loss": 1.1776, "step": 19162 }, { "epoch": 0.6862678389170412, "grad_norm": 1.7831507921218872, "learning_rate": 4.732923178454512e-05, "loss": 1.4696, "step": 19163 }, { "epoch": 0.6863036510466095, "grad_norm": 1.283743977546692, "learning_rate": 4.731937243876262e-05, "loss": 1.2818, "step": 19164 }, { "epoch": 0.6863394631761778, "grad_norm": 2.2443976402282715, "learning_rate": 4.730951380173e-05, "loss": 1.6624, "step": 19165 }, { "epoch": 0.686375275305746, "grad_norm": 1.4468728303909302, "learning_rate": 4.729965587357995e-05, "loss": 1.0889, "step": 19166 }, { "epoch": 0.6864110874353143, "grad_norm": 1.9218125343322754, "learning_rate": 4.728979865444505e-05, "loss": 1.3015, "step": 19167 }, { "epoch": 0.6864468995648826, "grad_norm": 1.458131194114685, "learning_rate": 4.7279942144457847e-05, "loss": 1.6985, "step": 19168 }, { "epoch": 0.686482711694451, "grad_norm": 1.8317525386810303, "learning_rate": 4.7270086343751085e-05, "loss": 1.5711, "step": 19169 }, { "epoch": 0.6865185238240192, "grad_norm": 2.029881715774536, "learning_rate": 4.7260231252457265e-05, "loss": 1.7153, "step": 19170 }, { "epoch": 0.6865543359535875, "grad_norm": 1.9590765237808228, "learning_rate": 4.7250376870709e-05, "loss": 1.1425, "step": 19171 }, { "epoch": 0.6865901480831558, "grad_norm": 1.7559995651245117, "learning_rate": 4.7240523198638875e-05, "loss": 1.3486, "step": 19172 }, { "epoch": 0.686625960212724, "grad_norm": 2.3266706466674805, "learning_rate": 4.723067023637949e-05, "loss": 1.6996, "step": 19173 }, { "epoch": 0.6866617723422923, "grad_norm": 1.5278242826461792, "learning_rate": 4.722081798406337e-05, "loss": 1.6506, "step": 19174 }, { "epoch": 0.6866975844718606, "grad_norm": 2.3450798988342285, "learning_rate": 4.721096644182299e-05, "loss": 1.345, "step": 19175 }, { "epoch": 0.686733396601429, "grad_norm": 2.0274198055267334, "learning_rate": 4.720111560979104e-05, "loss": 1.4916, "step": 19176 }, { "epoch": 0.6867692087309972, "grad_norm": 1.3534672260284424, "learning_rate": 4.719126548809993e-05, "loss": 1.4593, "step": 19177 }, { "epoch": 0.6868050208605655, "grad_norm": 1.6133003234863281, "learning_rate": 4.7181416076882266e-05, "loss": 1.4837, "step": 19178 }, { "epoch": 0.6868408329901338, "grad_norm": 1.8048996925354004, "learning_rate": 4.7171567376270443e-05, "loss": 1.5241, "step": 19179 }, { "epoch": 0.686876645119702, "grad_norm": 1.4059399366378784, "learning_rate": 4.716171938639711e-05, "loss": 1.4154, "step": 19180 }, { "epoch": 0.6869124572492703, "grad_norm": 1.9644883871078491, "learning_rate": 4.715187210739466e-05, "loss": 1.2348, "step": 19181 }, { "epoch": 0.6869482693788386, "grad_norm": 2.089815378189087, "learning_rate": 4.714202553939562e-05, "loss": 1.4767, "step": 19182 }, { "epoch": 0.686984081508407, "grad_norm": 1.4828203916549683, "learning_rate": 4.713217968253242e-05, "loss": 1.6034, "step": 19183 }, { "epoch": 0.6870198936379752, "grad_norm": 1.8886553049087524, "learning_rate": 4.712233453693754e-05, "loss": 1.5413, "step": 19184 }, { "epoch": 0.6870557057675435, "grad_norm": 2.037841796875, "learning_rate": 4.711249010274349e-05, "loss": 1.6245, "step": 19185 }, { "epoch": 0.6870915178971118, "grad_norm": 1.8798308372497559, "learning_rate": 4.710264638008258e-05, "loss": 1.6074, "step": 19186 }, { "epoch": 0.68712733002668, "grad_norm": 2.2628753185272217, "learning_rate": 4.709280336908741e-05, "loss": 1.5401, "step": 19187 }, { "epoch": 0.6871631421562483, "grad_norm": 1.5651167631149292, "learning_rate": 4.7082961069890284e-05, "loss": 1.5082, "step": 19188 }, { "epoch": 0.6871989542858166, "grad_norm": 1.807701826095581, "learning_rate": 4.707311948262371e-05, "loss": 1.448, "step": 19189 }, { "epoch": 0.687234766415385, "grad_norm": 1.4807206392288208, "learning_rate": 4.7063278607419944e-05, "loss": 1.4774, "step": 19190 }, { "epoch": 0.6872705785449532, "grad_norm": 1.4907841682434082, "learning_rate": 4.705343844441158e-05, "loss": 1.3822, "step": 19191 }, { "epoch": 0.6873063906745215, "grad_norm": 2.6561944484710693, "learning_rate": 4.704359899373089e-05, "loss": 1.3852, "step": 19192 }, { "epoch": 0.6873422028040898, "grad_norm": 1.906999111175537, "learning_rate": 4.703376025551023e-05, "loss": 1.4777, "step": 19193 }, { "epoch": 0.687378014933658, "grad_norm": 1.8173359632492065, "learning_rate": 4.7023922229882013e-05, "loss": 1.3468, "step": 19194 }, { "epoch": 0.6874138270632263, "grad_norm": 1.8602315187454224, "learning_rate": 4.701408491697859e-05, "loss": 1.5548, "step": 19195 }, { "epoch": 0.6874496391927946, "grad_norm": 1.9973033666610718, "learning_rate": 4.700424831693233e-05, "loss": 1.4175, "step": 19196 }, { "epoch": 0.6874854513223629, "grad_norm": 1.5902940034866333, "learning_rate": 4.699441242987548e-05, "loss": 1.2324, "step": 19197 }, { "epoch": 0.6875212634519312, "grad_norm": 1.6441142559051514, "learning_rate": 4.698457725594052e-05, "loss": 1.7983, "step": 19198 }, { "epoch": 0.6875570755814995, "grad_norm": 1.9269689321517944, "learning_rate": 4.697474279525964e-05, "loss": 1.3264, "step": 19199 }, { "epoch": 0.6875928877110677, "grad_norm": 1.7410550117492676, "learning_rate": 4.6964909047965246e-05, "loss": 1.2364, "step": 19200 }, { "epoch": 0.687628699840636, "grad_norm": 1.4461188316345215, "learning_rate": 4.6955076014189545e-05, "loss": 1.3768, "step": 19201 }, { "epoch": 0.6876645119702043, "grad_norm": 1.9367955923080444, "learning_rate": 4.694524369406488e-05, "loss": 1.7564, "step": 19202 }, { "epoch": 0.6877003240997726, "grad_norm": 2.228534460067749, "learning_rate": 4.693541208772356e-05, "loss": 1.5539, "step": 19203 }, { "epoch": 0.6877361362293409, "grad_norm": 1.685943365097046, "learning_rate": 4.692558119529778e-05, "loss": 1.4786, "step": 19204 }, { "epoch": 0.6877719483589092, "grad_norm": 1.4004324674606323, "learning_rate": 4.691575101691985e-05, "loss": 1.3005, "step": 19205 }, { "epoch": 0.6878077604884775, "grad_norm": 1.7593801021575928, "learning_rate": 4.6905921552722024e-05, "loss": 1.1818, "step": 19206 }, { "epoch": 0.6878435726180457, "grad_norm": 1.8382956981658936, "learning_rate": 4.6896092802836555e-05, "loss": 1.292, "step": 19207 }, { "epoch": 0.687879384747614, "grad_norm": 1.941030502319336, "learning_rate": 4.6886264767395635e-05, "loss": 1.4367, "step": 19208 }, { "epoch": 0.6879151968771823, "grad_norm": 2.078139543533325, "learning_rate": 4.687643744653151e-05, "loss": 1.4142, "step": 19209 }, { "epoch": 0.6879510090067505, "grad_norm": 1.504701852798462, "learning_rate": 4.6866610840376424e-05, "loss": 1.4804, "step": 19210 }, { "epoch": 0.6879868211363188, "grad_norm": 1.9218412637710571, "learning_rate": 4.6856784949062516e-05, "loss": 1.6285, "step": 19211 }, { "epoch": 0.6880226332658872, "grad_norm": 1.6151492595672607, "learning_rate": 4.6846959772722023e-05, "loss": 1.4061, "step": 19212 }, { "epoch": 0.6880584453954555, "grad_norm": 1.6147105693817139, "learning_rate": 4.6837135311487125e-05, "loss": 1.1338, "step": 19213 }, { "epoch": 0.6880942575250237, "grad_norm": 1.5472468137741089, "learning_rate": 4.6827311565490026e-05, "loss": 1.6359, "step": 19214 }, { "epoch": 0.688130069654592, "grad_norm": 1.5259733200073242, "learning_rate": 4.681748853486283e-05, "loss": 1.3292, "step": 19215 }, { "epoch": 0.6881658817841603, "grad_norm": 1.8748228549957275, "learning_rate": 4.6807666219737724e-05, "loss": 1.451, "step": 19216 }, { "epoch": 0.6882016939137285, "grad_norm": 1.626900315284729, "learning_rate": 4.679784462024686e-05, "loss": 1.6105, "step": 19217 }, { "epoch": 0.6882375060432968, "grad_norm": 1.2625377178192139, "learning_rate": 4.6788023736522405e-05, "loss": 1.0924, "step": 19218 }, { "epoch": 0.6882733181728652, "grad_norm": 1.3529118299484253, "learning_rate": 4.677820356869641e-05, "loss": 1.4678, "step": 19219 }, { "epoch": 0.6883091303024335, "grad_norm": 2.293600082397461, "learning_rate": 4.676838411690103e-05, "loss": 1.5526, "step": 19220 }, { "epoch": 0.6883449424320017, "grad_norm": 2.429054021835327, "learning_rate": 4.675856538126843e-05, "loss": 1.4861, "step": 19221 }, { "epoch": 0.68838075456157, "grad_norm": 1.7435716390609741, "learning_rate": 4.674874736193061e-05, "loss": 1.6183, "step": 19222 }, { "epoch": 0.6884165666911383, "grad_norm": 1.8326066732406616, "learning_rate": 4.67389300590197e-05, "loss": 1.3012, "step": 19223 }, { "epoch": 0.6884523788207065, "grad_norm": 1.5112701654434204, "learning_rate": 4.67291134726678e-05, "loss": 1.1489, "step": 19224 }, { "epoch": 0.6884881909502748, "grad_norm": 1.7756952047348022, "learning_rate": 4.6719297603006994e-05, "loss": 1.5206, "step": 19225 }, { "epoch": 0.6885240030798432, "grad_norm": 2.031517744064331, "learning_rate": 4.6709482450169275e-05, "loss": 1.5716, "step": 19226 }, { "epoch": 0.6885598152094115, "grad_norm": 1.7005447149276733, "learning_rate": 4.6699668014286724e-05, "loss": 1.6381, "step": 19227 }, { "epoch": 0.6885956273389797, "grad_norm": 1.2920862436294556, "learning_rate": 4.668985429549143e-05, "loss": 1.6038, "step": 19228 }, { "epoch": 0.688631439468548, "grad_norm": 1.2875529527664185, "learning_rate": 4.6680041293915336e-05, "loss": 1.2111, "step": 19229 }, { "epoch": 0.6886672515981163, "grad_norm": 1.2606359720230103, "learning_rate": 4.6670229009690516e-05, "loss": 1.4477, "step": 19230 }, { "epoch": 0.6887030637276845, "grad_norm": 2.3484201431274414, "learning_rate": 4.666041744294898e-05, "loss": 1.7855, "step": 19231 }, { "epoch": 0.6887388758572528, "grad_norm": 1.6336703300476074, "learning_rate": 4.665060659382274e-05, "loss": 1.1235, "step": 19232 }, { "epoch": 0.6887746879868212, "grad_norm": 2.369983196258545, "learning_rate": 4.664079646244376e-05, "loss": 1.323, "step": 19233 }, { "epoch": 0.6888105001163894, "grad_norm": 1.3482604026794434, "learning_rate": 4.663098704894402e-05, "loss": 1.5943, "step": 19234 }, { "epoch": 0.6888463122459577, "grad_norm": 1.468631386756897, "learning_rate": 4.662117835345552e-05, "loss": 1.4307, "step": 19235 }, { "epoch": 0.688882124375526, "grad_norm": 1.9677053689956665, "learning_rate": 4.661137037611024e-05, "loss": 1.1598, "step": 19236 }, { "epoch": 0.6889179365050943, "grad_norm": 1.6794158220291138, "learning_rate": 4.660156311704007e-05, "loss": 1.7906, "step": 19237 }, { "epoch": 0.6889537486346625, "grad_norm": 1.7666298151016235, "learning_rate": 4.659175657637699e-05, "loss": 1.2263, "step": 19238 }, { "epoch": 0.6889895607642308, "grad_norm": 2.3482303619384766, "learning_rate": 4.658195075425297e-05, "loss": 1.285, "step": 19239 }, { "epoch": 0.6890253728937992, "grad_norm": 1.2648680210113525, "learning_rate": 4.657214565079986e-05, "loss": 1.4305, "step": 19240 }, { "epoch": 0.6890611850233674, "grad_norm": 2.5876047611236572, "learning_rate": 4.656234126614961e-05, "loss": 1.7217, "step": 19241 }, { "epoch": 0.6890969971529357, "grad_norm": 1.4185413122177124, "learning_rate": 4.655253760043413e-05, "loss": 1.5885, "step": 19242 }, { "epoch": 0.689132809282504, "grad_norm": 1.3307768106460571, "learning_rate": 4.654273465378536e-05, "loss": 1.2001, "step": 19243 }, { "epoch": 0.6891686214120722, "grad_norm": 1.5933369398117065, "learning_rate": 4.65329324263351e-05, "loss": 1.2176, "step": 19244 }, { "epoch": 0.6892044335416405, "grad_norm": 1.4384766817092896, "learning_rate": 4.652313091821526e-05, "loss": 1.4176, "step": 19245 }, { "epoch": 0.6892402456712088, "grad_norm": 1.2715067863464355, "learning_rate": 4.651333012955775e-05, "loss": 1.6592, "step": 19246 }, { "epoch": 0.6892760578007772, "grad_norm": 1.4462275505065918, "learning_rate": 4.650353006049436e-05, "loss": 1.4901, "step": 19247 }, { "epoch": 0.6893118699303454, "grad_norm": 1.8860747814178467, "learning_rate": 4.649373071115697e-05, "loss": 1.4205, "step": 19248 }, { "epoch": 0.6893476820599137, "grad_norm": 1.4204922914505005, "learning_rate": 4.6483932081677407e-05, "loss": 1.3774, "step": 19249 }, { "epoch": 0.689383494189482, "grad_norm": 1.5421233177185059, "learning_rate": 4.647413417218756e-05, "loss": 1.0301, "step": 19250 }, { "epoch": 0.6894193063190502, "grad_norm": 1.686322808265686, "learning_rate": 4.646433698281913e-05, "loss": 1.5836, "step": 19251 }, { "epoch": 0.6894551184486185, "grad_norm": 1.406529426574707, "learning_rate": 4.645454051370401e-05, "loss": 1.1692, "step": 19252 }, { "epoch": 0.6894909305781868, "grad_norm": 1.3951549530029297, "learning_rate": 4.644474476497397e-05, "loss": 1.3326, "step": 19253 }, { "epoch": 0.6895267427077552, "grad_norm": 1.7773816585540771, "learning_rate": 4.6434949736760844e-05, "loss": 1.3854, "step": 19254 }, { "epoch": 0.6895625548373234, "grad_norm": 2.3042869567871094, "learning_rate": 4.642515542919635e-05, "loss": 1.5021, "step": 19255 }, { "epoch": 0.6895983669668917, "grad_norm": 1.5629322528839111, "learning_rate": 4.641536184241228e-05, "loss": 1.4215, "step": 19256 }, { "epoch": 0.68963417909646, "grad_norm": 1.878865122795105, "learning_rate": 4.640556897654042e-05, "loss": 1.3999, "step": 19257 }, { "epoch": 0.6896699912260282, "grad_norm": 1.6263047456741333, "learning_rate": 4.639577683171248e-05, "loss": 1.5993, "step": 19258 }, { "epoch": 0.6897058033555965, "grad_norm": 1.5195845365524292, "learning_rate": 4.638598540806021e-05, "loss": 1.4672, "step": 19259 }, { "epoch": 0.6897416154851648, "grad_norm": 1.7965352535247803, "learning_rate": 4.637619470571535e-05, "loss": 1.7661, "step": 19260 }, { "epoch": 0.6897774276147332, "grad_norm": 1.6480170488357544, "learning_rate": 4.636640472480965e-05, "loss": 1.4935, "step": 19261 }, { "epoch": 0.6898132397443014, "grad_norm": 1.973185658454895, "learning_rate": 4.635661546547476e-05, "loss": 1.669, "step": 19262 }, { "epoch": 0.6898490518738697, "grad_norm": 1.6498275995254517, "learning_rate": 4.634682692784241e-05, "loss": 1.2816, "step": 19263 }, { "epoch": 0.689884864003438, "grad_norm": 1.2615090608596802, "learning_rate": 4.6337039112044346e-05, "loss": 1.011, "step": 19264 }, { "epoch": 0.6899206761330062, "grad_norm": 1.4488906860351562, "learning_rate": 4.632725201821215e-05, "loss": 1.014, "step": 19265 }, { "epoch": 0.6899564882625745, "grad_norm": 1.690446138381958, "learning_rate": 4.6317465646477584e-05, "loss": 1.1892, "step": 19266 }, { "epoch": 0.6899923003921428, "grad_norm": 1.2341758012771606, "learning_rate": 4.6307679996972205e-05, "loss": 1.3111, "step": 19267 }, { "epoch": 0.6900281125217111, "grad_norm": 1.505085825920105, "learning_rate": 4.62978950698278e-05, "loss": 1.3969, "step": 19268 }, { "epoch": 0.6900639246512794, "grad_norm": 1.8536823987960815, "learning_rate": 4.6288110865175914e-05, "loss": 1.2291, "step": 19269 }, { "epoch": 0.6900997367808477, "grad_norm": 1.2914131879806519, "learning_rate": 4.627832738314821e-05, "loss": 1.3716, "step": 19270 }, { "epoch": 0.690135548910416, "grad_norm": 1.180411458015442, "learning_rate": 4.6268544623876364e-05, "loss": 1.2842, "step": 19271 }, { "epoch": 0.6901713610399842, "grad_norm": 1.6704957485198975, "learning_rate": 4.625876258749189e-05, "loss": 1.3887, "step": 19272 }, { "epoch": 0.6902071731695525, "grad_norm": 1.6580283641815186, "learning_rate": 4.624898127412649e-05, "loss": 1.4075, "step": 19273 }, { "epoch": 0.6902429852991208, "grad_norm": 2.034627914428711, "learning_rate": 4.623920068391163e-05, "loss": 1.2691, "step": 19274 }, { "epoch": 0.6902787974286891, "grad_norm": 1.591990351676941, "learning_rate": 4.622942081697906e-05, "loss": 1.4743, "step": 19275 }, { "epoch": 0.6903146095582574, "grad_norm": 1.417268991470337, "learning_rate": 4.6219641673460236e-05, "loss": 1.257, "step": 19276 }, { "epoch": 0.6903504216878257, "grad_norm": 1.6440649032592773, "learning_rate": 4.62098632534868e-05, "loss": 1.2822, "step": 19277 }, { "epoch": 0.690386233817394, "grad_norm": 1.5523018836975098, "learning_rate": 4.620008555719019e-05, "loss": 1.3786, "step": 19278 }, { "epoch": 0.6904220459469622, "grad_norm": 1.6315157413482666, "learning_rate": 4.619030858470211e-05, "loss": 1.5197, "step": 19279 }, { "epoch": 0.6904578580765305, "grad_norm": 1.3777469396591187, "learning_rate": 4.6180532336154014e-05, "loss": 1.1425, "step": 19280 }, { "epoch": 0.6904936702060988, "grad_norm": 1.4311867952346802, "learning_rate": 4.617075681167736e-05, "loss": 1.2902, "step": 19281 }, { "epoch": 0.6905294823356671, "grad_norm": 1.3356856107711792, "learning_rate": 4.616098201140382e-05, "loss": 1.4217, "step": 19282 }, { "epoch": 0.6905652944652354, "grad_norm": 1.3175742626190186, "learning_rate": 4.615120793546478e-05, "loss": 1.2804, "step": 19283 }, { "epoch": 0.6906011065948037, "grad_norm": 1.5983734130859375, "learning_rate": 4.6141434583991803e-05, "loss": 1.4278, "step": 19284 }, { "epoch": 0.6906369187243719, "grad_norm": 1.3159353733062744, "learning_rate": 4.613166195711629e-05, "loss": 1.3914, "step": 19285 }, { "epoch": 0.6906727308539402, "grad_norm": 1.8552560806274414, "learning_rate": 4.612189005496985e-05, "loss": 1.2788, "step": 19286 }, { "epoch": 0.6907085429835085, "grad_norm": 1.4018633365631104, "learning_rate": 4.611211887768384e-05, "loss": 1.4291, "step": 19287 }, { "epoch": 0.6907443551130767, "grad_norm": 1.557361364364624, "learning_rate": 4.6102348425389804e-05, "loss": 1.2867, "step": 19288 }, { "epoch": 0.6907801672426451, "grad_norm": 1.355259895324707, "learning_rate": 4.609257869821911e-05, "loss": 1.1086, "step": 19289 }, { "epoch": 0.6908159793722134, "grad_norm": 1.498028039932251, "learning_rate": 4.608280969630323e-05, "loss": 1.7323, "step": 19290 }, { "epoch": 0.6908517915017817, "grad_norm": 1.7910292148590088, "learning_rate": 4.6073041419773635e-05, "loss": 1.2225, "step": 19291 }, { "epoch": 0.6908876036313499, "grad_norm": 1.6466988325119019, "learning_rate": 4.606327386876167e-05, "loss": 1.3962, "step": 19292 }, { "epoch": 0.6909234157609182, "grad_norm": 1.5631616115570068, "learning_rate": 4.605350704339879e-05, "loss": 1.4526, "step": 19293 }, { "epoch": 0.6909592278904865, "grad_norm": 2.4088470935821533, "learning_rate": 4.604374094381637e-05, "loss": 1.8184, "step": 19294 }, { "epoch": 0.6909950400200547, "grad_norm": 1.8684895038604736, "learning_rate": 4.603397557014587e-05, "loss": 1.5012, "step": 19295 }, { "epoch": 0.6910308521496231, "grad_norm": 1.6042251586914062, "learning_rate": 4.602421092251854e-05, "loss": 1.2943, "step": 19296 }, { "epoch": 0.6910666642791914, "grad_norm": 2.03073787689209, "learning_rate": 4.60144470010659e-05, "loss": 1.9343, "step": 19297 }, { "epoch": 0.6911024764087597, "grad_norm": 1.6332614421844482, "learning_rate": 4.600468380591923e-05, "loss": 1.4177, "step": 19298 }, { "epoch": 0.6911382885383279, "grad_norm": 1.823080062866211, "learning_rate": 4.599492133720986e-05, "loss": 1.283, "step": 19299 }, { "epoch": 0.6911741006678962, "grad_norm": 2.0920989513397217, "learning_rate": 4.598515959506917e-05, "loss": 1.3483, "step": 19300 }, { "epoch": 0.6912099127974645, "grad_norm": 1.4750959873199463, "learning_rate": 4.597539857962848e-05, "loss": 1.3174, "step": 19301 }, { "epoch": 0.6912457249270327, "grad_norm": 1.3776130676269531, "learning_rate": 4.5965638291019145e-05, "loss": 1.525, "step": 19302 }, { "epoch": 0.6912815370566011, "grad_norm": 1.7445027828216553, "learning_rate": 4.595587872937241e-05, "loss": 1.5717, "step": 19303 }, { "epoch": 0.6913173491861694, "grad_norm": 1.7076929807662964, "learning_rate": 4.594611989481963e-05, "loss": 1.1559, "step": 19304 }, { "epoch": 0.6913531613157377, "grad_norm": 1.4765623807907104, "learning_rate": 4.593636178749206e-05, "loss": 1.6233, "step": 19305 }, { "epoch": 0.6913889734453059, "grad_norm": 1.6138931512832642, "learning_rate": 4.592660440752107e-05, "loss": 1.3967, "step": 19306 }, { "epoch": 0.6914247855748742, "grad_norm": 1.3620359897613525, "learning_rate": 4.5916847755037806e-05, "loss": 1.349, "step": 19307 }, { "epoch": 0.6914605977044425, "grad_norm": 1.8544646501541138, "learning_rate": 4.590709183017361e-05, "loss": 1.5375, "step": 19308 }, { "epoch": 0.6914964098340107, "grad_norm": 1.3989589214324951, "learning_rate": 4.5897336633059737e-05, "loss": 1.3476, "step": 19309 }, { "epoch": 0.6915322219635791, "grad_norm": 2.901787281036377, "learning_rate": 4.588758216382739e-05, "loss": 1.6608, "step": 19310 }, { "epoch": 0.6915680340931474, "grad_norm": 1.9451749324798584, "learning_rate": 4.5877828422607824e-05, "loss": 1.7291, "step": 19311 }, { "epoch": 0.6916038462227156, "grad_norm": 1.8403728008270264, "learning_rate": 4.5868075409532265e-05, "loss": 1.2411, "step": 19312 }, { "epoch": 0.6916396583522839, "grad_norm": 1.5571632385253906, "learning_rate": 4.585832312473196e-05, "loss": 1.6115, "step": 19313 }, { "epoch": 0.6916754704818522, "grad_norm": 2.0901341438293457, "learning_rate": 4.584857156833804e-05, "loss": 1.6583, "step": 19314 }, { "epoch": 0.6917112826114205, "grad_norm": 1.7531592845916748, "learning_rate": 4.583882074048174e-05, "loss": 1.2701, "step": 19315 }, { "epoch": 0.6917470947409887, "grad_norm": 1.3582504987716675, "learning_rate": 4.582907064129428e-05, "loss": 1.3205, "step": 19316 }, { "epoch": 0.6917829068705571, "grad_norm": 1.857133150100708, "learning_rate": 4.5819321270906765e-05, "loss": 1.5085, "step": 19317 }, { "epoch": 0.6918187190001254, "grad_norm": 2.7562215328216553, "learning_rate": 4.580957262945039e-05, "loss": 1.4333, "step": 19318 }, { "epoch": 0.6918545311296936, "grad_norm": 2.172489881515503, "learning_rate": 4.5799824717056325e-05, "loss": 1.6725, "step": 19319 }, { "epoch": 0.6918903432592619, "grad_norm": 1.492525577545166, "learning_rate": 4.579007753385573e-05, "loss": 1.4247, "step": 19320 }, { "epoch": 0.6919261553888302, "grad_norm": 1.6404074430465698, "learning_rate": 4.578033107997969e-05, "loss": 1.5811, "step": 19321 }, { "epoch": 0.6919619675183984, "grad_norm": 1.5491911172866821, "learning_rate": 4.577058535555935e-05, "loss": 1.5538, "step": 19322 }, { "epoch": 0.6919977796479667, "grad_norm": 1.4528319835662842, "learning_rate": 4.576084036072584e-05, "loss": 1.2544, "step": 19323 }, { "epoch": 0.6920335917775351, "grad_norm": 1.7799615859985352, "learning_rate": 4.575109609561029e-05, "loss": 1.4805, "step": 19324 }, { "epoch": 0.6920694039071034, "grad_norm": 1.4052472114562988, "learning_rate": 4.5741352560343734e-05, "loss": 1.364, "step": 19325 }, { "epoch": 0.6921052160366716, "grad_norm": 1.4462934732437134, "learning_rate": 4.5731609755057284e-05, "loss": 1.5724, "step": 19326 }, { "epoch": 0.6921410281662399, "grad_norm": 2.7505440711975098, "learning_rate": 4.572186767988206e-05, "loss": 1.4352, "step": 19327 }, { "epoch": 0.6921768402958082, "grad_norm": 2.1405277252197266, "learning_rate": 4.571212633494906e-05, "loss": 1.8836, "step": 19328 }, { "epoch": 0.6922126524253764, "grad_norm": 1.584587574005127, "learning_rate": 4.5702385720389376e-05, "loss": 1.2718, "step": 19329 }, { "epoch": 0.6922484645549447, "grad_norm": 1.6535595655441284, "learning_rate": 4.569264583633405e-05, "loss": 1.3507, "step": 19330 }, { "epoch": 0.6922842766845131, "grad_norm": 1.9386780261993408, "learning_rate": 4.568290668291416e-05, "loss": 1.345, "step": 19331 }, { "epoch": 0.6923200888140814, "grad_norm": 1.418664813041687, "learning_rate": 4.567316826026066e-05, "loss": 1.1447, "step": 19332 }, { "epoch": 0.6923559009436496, "grad_norm": 1.544927716255188, "learning_rate": 4.5663430568504603e-05, "loss": 1.5124, "step": 19333 }, { "epoch": 0.6923917130732179, "grad_norm": 1.3872967958450317, "learning_rate": 4.565369360777704e-05, "loss": 1.6032, "step": 19334 }, { "epoch": 0.6924275252027862, "grad_norm": 1.791256308555603, "learning_rate": 4.564395737820888e-05, "loss": 1.7009, "step": 19335 }, { "epoch": 0.6924633373323544, "grad_norm": 1.7304942607879639, "learning_rate": 4.563422187993117e-05, "loss": 1.3953, "step": 19336 }, { "epoch": 0.6924991494619227, "grad_norm": 1.745445966720581, "learning_rate": 4.5624487113074874e-05, "loss": 1.4433, "step": 19337 }, { "epoch": 0.6925349615914911, "grad_norm": 1.7737088203430176, "learning_rate": 4.5614753077771e-05, "loss": 1.6636, "step": 19338 }, { "epoch": 0.6925707737210594, "grad_norm": 1.6347934007644653, "learning_rate": 4.560501977415044e-05, "loss": 1.6366, "step": 19339 }, { "epoch": 0.6926065858506276, "grad_norm": 1.919651985168457, "learning_rate": 4.5595287202344175e-05, "loss": 1.5327, "step": 19340 }, { "epoch": 0.6926423979801959, "grad_norm": 1.5812537670135498, "learning_rate": 4.558555536248313e-05, "loss": 1.6632, "step": 19341 }, { "epoch": 0.6926782101097642, "grad_norm": 1.4711294174194336, "learning_rate": 4.55758242546983e-05, "loss": 1.4774, "step": 19342 }, { "epoch": 0.6927140222393324, "grad_norm": 1.3836665153503418, "learning_rate": 4.5566093879120505e-05, "loss": 1.2167, "step": 19343 }, { "epoch": 0.6927498343689007, "grad_norm": 1.8161534070968628, "learning_rate": 4.555636423588071e-05, "loss": 1.4929, "step": 19344 }, { "epoch": 0.6927856464984691, "grad_norm": 1.7006677389144897, "learning_rate": 4.5546635325109844e-05, "loss": 1.1194, "step": 19345 }, { "epoch": 0.6928214586280373, "grad_norm": 1.4193700551986694, "learning_rate": 4.553690714693872e-05, "loss": 1.4037, "step": 19346 }, { "epoch": 0.6928572707576056, "grad_norm": 1.9114856719970703, "learning_rate": 4.5527179701498256e-05, "loss": 1.4374, "step": 19347 }, { "epoch": 0.6928930828871739, "grad_norm": 1.6580359935760498, "learning_rate": 4.551745298891933e-05, "loss": 1.3423, "step": 19348 }, { "epoch": 0.6929288950167422, "grad_norm": 1.9556456804275513, "learning_rate": 4.5507727009332824e-05, "loss": 1.2934, "step": 19349 }, { "epoch": 0.6929647071463104, "grad_norm": 2.0172038078308105, "learning_rate": 4.549800176286954e-05, "loss": 1.599, "step": 19350 }, { "epoch": 0.6930005192758787, "grad_norm": 2.1281747817993164, "learning_rate": 4.5488277249660325e-05, "loss": 1.6343, "step": 19351 }, { "epoch": 0.6930363314054471, "grad_norm": 1.7381844520568848, "learning_rate": 4.5478553469836064e-05, "loss": 1.2599, "step": 19352 }, { "epoch": 0.6930721435350153, "grad_norm": 2.3697657585144043, "learning_rate": 4.546883042352751e-05, "loss": 1.7014, "step": 19353 }, { "epoch": 0.6931079556645836, "grad_norm": 2.5228118896484375, "learning_rate": 4.545910811086549e-05, "loss": 1.4948, "step": 19354 }, { "epoch": 0.6931437677941519, "grad_norm": 1.3983299732208252, "learning_rate": 4.544938653198082e-05, "loss": 1.4429, "step": 19355 }, { "epoch": 0.6931795799237201, "grad_norm": 1.7777349948883057, "learning_rate": 4.543966568700433e-05, "loss": 1.4876, "step": 19356 }, { "epoch": 0.6932153920532884, "grad_norm": 1.5651066303253174, "learning_rate": 4.542994557606672e-05, "loss": 1.683, "step": 19357 }, { "epoch": 0.6932512041828567, "grad_norm": 1.167218565940857, "learning_rate": 4.542022619929881e-05, "loss": 1.5015, "step": 19358 }, { "epoch": 0.6932870163124251, "grad_norm": 1.1746838092803955, "learning_rate": 4.541050755683135e-05, "loss": 1.4014, "step": 19359 }, { "epoch": 0.6933228284419933, "grad_norm": 1.9746357202529907, "learning_rate": 4.5400789648795136e-05, "loss": 1.5701, "step": 19360 }, { "epoch": 0.6933586405715616, "grad_norm": 2.039637804031372, "learning_rate": 4.539107247532086e-05, "loss": 1.6741, "step": 19361 }, { "epoch": 0.6933944527011299, "grad_norm": 1.581687331199646, "learning_rate": 4.5381356036539204e-05, "loss": 1.2782, "step": 19362 }, { "epoch": 0.6934302648306981, "grad_norm": 1.605271577835083, "learning_rate": 4.537164033258101e-05, "loss": 1.582, "step": 19363 }, { "epoch": 0.6934660769602664, "grad_norm": 1.3678474426269531, "learning_rate": 4.53619253635769e-05, "loss": 1.5372, "step": 19364 }, { "epoch": 0.6935018890898347, "grad_norm": 1.3803857564926147, "learning_rate": 4.5352211129657596e-05, "loss": 1.2848, "step": 19365 }, { "epoch": 0.6935377012194031, "grad_norm": 1.4274797439575195, "learning_rate": 4.5342497630953806e-05, "loss": 1.5293, "step": 19366 }, { "epoch": 0.6935735133489713, "grad_norm": 1.4727002382278442, "learning_rate": 4.533278486759625e-05, "loss": 1.2364, "step": 19367 }, { "epoch": 0.6936093254785396, "grad_norm": 1.8014189004898071, "learning_rate": 4.5323072839715555e-05, "loss": 1.4292, "step": 19368 }, { "epoch": 0.6936451376081079, "grad_norm": 1.8191452026367188, "learning_rate": 4.531336154744231e-05, "loss": 1.4423, "step": 19369 }, { "epoch": 0.6936809497376761, "grad_norm": 1.5207409858703613, "learning_rate": 4.530365099090732e-05, "loss": 1.5638, "step": 19370 }, { "epoch": 0.6937167618672444, "grad_norm": 1.5536220073699951, "learning_rate": 4.5293941170241116e-05, "loss": 1.5002, "step": 19371 }, { "epoch": 0.6937525739968127, "grad_norm": 2.320056200027466, "learning_rate": 4.528423208557441e-05, "loss": 1.3389, "step": 19372 }, { "epoch": 0.693788386126381, "grad_norm": 1.6876623630523682, "learning_rate": 4.52745237370377e-05, "loss": 1.2901, "step": 19373 }, { "epoch": 0.6938241982559493, "grad_norm": 1.3508429527282715, "learning_rate": 4.526481612476176e-05, "loss": 1.3113, "step": 19374 }, { "epoch": 0.6938600103855176, "grad_norm": 1.5658550262451172, "learning_rate": 4.525510924887707e-05, "loss": 1.5774, "step": 19375 }, { "epoch": 0.6938958225150859, "grad_norm": 1.841926097869873, "learning_rate": 4.524540310951432e-05, "loss": 1.3422, "step": 19376 }, { "epoch": 0.6939316346446541, "grad_norm": 2.058666467666626, "learning_rate": 4.5235697706804e-05, "loss": 1.4251, "step": 19377 }, { "epoch": 0.6939674467742224, "grad_norm": 1.5321693420410156, "learning_rate": 4.5225993040876724e-05, "loss": 1.5993, "step": 19378 }, { "epoch": 0.6940032589037907, "grad_norm": 1.959938645362854, "learning_rate": 4.521628911186311e-05, "loss": 1.6419, "step": 19379 }, { "epoch": 0.694039071033359, "grad_norm": 1.6654266119003296, "learning_rate": 4.5206585919893563e-05, "loss": 1.5531, "step": 19380 }, { "epoch": 0.6940748831629273, "grad_norm": 2.1616580486297607, "learning_rate": 4.519688346509881e-05, "loss": 1.935, "step": 19381 }, { "epoch": 0.6941106952924956, "grad_norm": 1.5678056478500366, "learning_rate": 4.5187181747609265e-05, "loss": 1.4482, "step": 19382 }, { "epoch": 0.6941465074220639, "grad_norm": 1.2296350002288818, "learning_rate": 4.5177480767555525e-05, "loss": 1.4928, "step": 19383 }, { "epoch": 0.6941823195516321, "grad_norm": 1.5905427932739258, "learning_rate": 4.516778052506798e-05, "loss": 1.4104, "step": 19384 }, { "epoch": 0.6942181316812004, "grad_norm": 2.694613456726074, "learning_rate": 4.5158081020277296e-05, "loss": 1.6461, "step": 19385 }, { "epoch": 0.6942539438107687, "grad_norm": 1.8506790399551392, "learning_rate": 4.5148382253313904e-05, "loss": 1.4123, "step": 19386 }, { "epoch": 0.694289755940337, "grad_norm": 1.9660435914993286, "learning_rate": 4.513868422430823e-05, "loss": 1.4974, "step": 19387 }, { "epoch": 0.6943255680699053, "grad_norm": 1.9675049781799316, "learning_rate": 4.5128986933390785e-05, "loss": 1.1586, "step": 19388 }, { "epoch": 0.6943613801994736, "grad_norm": 2.04257869720459, "learning_rate": 4.5119290380692046e-05, "loss": 1.4606, "step": 19389 }, { "epoch": 0.6943971923290418, "grad_norm": 1.488710641860962, "learning_rate": 4.5109594566342515e-05, "loss": 1.605, "step": 19390 }, { "epoch": 0.6944330044586101, "grad_norm": 1.9784034490585327, "learning_rate": 4.50998994904725e-05, "loss": 1.1938, "step": 19391 }, { "epoch": 0.6944688165881784, "grad_norm": 1.4609148502349854, "learning_rate": 4.50902051532126e-05, "loss": 1.236, "step": 19392 }, { "epoch": 0.6945046287177467, "grad_norm": 4.472887992858887, "learning_rate": 4.508051155469312e-05, "loss": 1.8098, "step": 19393 }, { "epoch": 0.694540440847315, "grad_norm": 1.397859811782837, "learning_rate": 4.5070818695044545e-05, "loss": 1.4173, "step": 19394 }, { "epoch": 0.6945762529768833, "grad_norm": 1.3701924085617065, "learning_rate": 4.5061126574397225e-05, "loss": 1.1701, "step": 19395 }, { "epoch": 0.6946120651064516, "grad_norm": 1.419206976890564, "learning_rate": 4.505143519288159e-05, "loss": 1.4127, "step": 19396 }, { "epoch": 0.6946478772360198, "grad_norm": 1.7711102962493896, "learning_rate": 4.504174455062803e-05, "loss": 1.4287, "step": 19397 }, { "epoch": 0.6946836893655881, "grad_norm": 1.9416470527648926, "learning_rate": 4.503205464776689e-05, "loss": 1.193, "step": 19398 }, { "epoch": 0.6947195014951564, "grad_norm": 1.6379601955413818, "learning_rate": 4.502236548442853e-05, "loss": 1.1867, "step": 19399 }, { "epoch": 0.6947553136247246, "grad_norm": 1.5109336376190186, "learning_rate": 4.501267706074335e-05, "loss": 1.0608, "step": 19400 }, { "epoch": 0.694791125754293, "grad_norm": 1.4156792163848877, "learning_rate": 4.5002989376841684e-05, "loss": 1.2618, "step": 19401 }, { "epoch": 0.6948269378838613, "grad_norm": 1.7646799087524414, "learning_rate": 4.499330243285383e-05, "loss": 1.5062, "step": 19402 }, { "epoch": 0.6948627500134296, "grad_norm": 1.45806884765625, "learning_rate": 4.4983616228910144e-05, "loss": 1.4862, "step": 19403 }, { "epoch": 0.6948985621429978, "grad_norm": 2.211923837661743, "learning_rate": 4.497393076514097e-05, "loss": 1.3923, "step": 19404 }, { "epoch": 0.6949343742725661, "grad_norm": 1.3924918174743652, "learning_rate": 4.496424604167654e-05, "loss": 1.6712, "step": 19405 }, { "epoch": 0.6949701864021344, "grad_norm": 1.4601891040802002, "learning_rate": 4.4954562058647187e-05, "loss": 1.4383, "step": 19406 }, { "epoch": 0.6950059985317026, "grad_norm": 1.7488446235656738, "learning_rate": 4.49448788161832e-05, "loss": 1.5504, "step": 19407 }, { "epoch": 0.695041810661271, "grad_norm": 3.262617349624634, "learning_rate": 4.493519631441488e-05, "loss": 1.5263, "step": 19408 }, { "epoch": 0.6950776227908393, "grad_norm": 1.4852958917617798, "learning_rate": 4.492551455347245e-05, "loss": 1.5381, "step": 19409 }, { "epoch": 0.6951134349204076, "grad_norm": 1.5856446027755737, "learning_rate": 4.491583353348616e-05, "loss": 1.4643, "step": 19410 }, { "epoch": 0.6951492470499758, "grad_norm": 2.515482187271118, "learning_rate": 4.490615325458628e-05, "loss": 1.7184, "step": 19411 }, { "epoch": 0.6951850591795441, "grad_norm": 1.6691640615463257, "learning_rate": 4.4896473716903085e-05, "loss": 1.5105, "step": 19412 }, { "epoch": 0.6952208713091124, "grad_norm": 1.558199405670166, "learning_rate": 4.488679492056672e-05, "loss": 1.3742, "step": 19413 }, { "epoch": 0.6952566834386806, "grad_norm": 1.819074034690857, "learning_rate": 4.487711686570744e-05, "loss": 1.3235, "step": 19414 }, { "epoch": 0.695292495568249, "grad_norm": 2.806718587875366, "learning_rate": 4.4867439552455485e-05, "loss": 1.4652, "step": 19415 }, { "epoch": 0.6953283076978173, "grad_norm": 1.7254616022109985, "learning_rate": 4.4857762980940974e-05, "loss": 1.3435, "step": 19416 }, { "epoch": 0.6953641198273856, "grad_norm": 1.4609277248382568, "learning_rate": 4.484808715129414e-05, "loss": 1.6299, "step": 19417 }, { "epoch": 0.6953999319569538, "grad_norm": 2.2493855953216553, "learning_rate": 4.483841206364514e-05, "loss": 1.7297, "step": 19418 }, { "epoch": 0.6954357440865221, "grad_norm": 1.397451639175415, "learning_rate": 4.4828737718124204e-05, "loss": 1.3422, "step": 19419 }, { "epoch": 0.6954715562160904, "grad_norm": 1.560224175453186, "learning_rate": 4.481906411486139e-05, "loss": 1.0808, "step": 19420 }, { "epoch": 0.6955073683456586, "grad_norm": 1.8856585025787354, "learning_rate": 4.480939125398689e-05, "loss": 1.556, "step": 19421 }, { "epoch": 0.695543180475227, "grad_norm": 1.7038389444351196, "learning_rate": 4.479971913563088e-05, "loss": 1.2636, "step": 19422 }, { "epoch": 0.6955789926047953, "grad_norm": 1.789746642112732, "learning_rate": 4.4790047759923406e-05, "loss": 1.4515, "step": 19423 }, { "epoch": 0.6956148047343635, "grad_norm": 1.6814122200012207, "learning_rate": 4.478037712699463e-05, "loss": 1.6073, "step": 19424 }, { "epoch": 0.6956506168639318, "grad_norm": 1.5029243230819702, "learning_rate": 4.477070723697464e-05, "loss": 1.3639, "step": 19425 }, { "epoch": 0.6956864289935001, "grad_norm": 1.991762638092041, "learning_rate": 4.476103808999359e-05, "loss": 1.288, "step": 19426 }, { "epoch": 0.6957222411230684, "grad_norm": 1.5815709829330444, "learning_rate": 4.475136968618149e-05, "loss": 1.4307, "step": 19427 }, { "epoch": 0.6957580532526366, "grad_norm": 1.5167605876922607, "learning_rate": 4.474170202566843e-05, "loss": 1.3207, "step": 19428 }, { "epoch": 0.695793865382205, "grad_norm": 1.501737356185913, "learning_rate": 4.47320351085845e-05, "loss": 1.1018, "step": 19429 }, { "epoch": 0.6958296775117733, "grad_norm": 1.815674901008606, "learning_rate": 4.472236893505978e-05, "loss": 1.4576, "step": 19430 }, { "epoch": 0.6958654896413415, "grad_norm": 1.786224603652954, "learning_rate": 4.471270350522424e-05, "loss": 1.2835, "step": 19431 }, { "epoch": 0.6959013017709098, "grad_norm": 1.9200912714004517, "learning_rate": 4.4703038819207975e-05, "loss": 1.4545, "step": 19432 }, { "epoch": 0.6959371139004781, "grad_norm": 2.014310359954834, "learning_rate": 4.4693374877141015e-05, "loss": 1.459, "step": 19433 }, { "epoch": 0.6959729260300463, "grad_norm": 1.2802836894989014, "learning_rate": 4.4683711679153325e-05, "loss": 1.4649, "step": 19434 }, { "epoch": 0.6960087381596146, "grad_norm": 1.7526559829711914, "learning_rate": 4.467404922537495e-05, "loss": 1.3993, "step": 19435 }, { "epoch": 0.696044550289183, "grad_norm": 1.5115348100662231, "learning_rate": 4.466438751593587e-05, "loss": 1.4426, "step": 19436 }, { "epoch": 0.6960803624187513, "grad_norm": 2.1051738262176514, "learning_rate": 4.465472655096611e-05, "loss": 1.542, "step": 19437 }, { "epoch": 0.6961161745483195, "grad_norm": 1.828957438468933, "learning_rate": 4.464506633059559e-05, "loss": 1.3994, "step": 19438 }, { "epoch": 0.6961519866778878, "grad_norm": 1.8577128648757935, "learning_rate": 4.463540685495429e-05, "loss": 1.5155, "step": 19439 }, { "epoch": 0.6961877988074561, "grad_norm": 1.6411844491958618, "learning_rate": 4.4625748124172204e-05, "loss": 1.7316, "step": 19440 }, { "epoch": 0.6962236109370243, "grad_norm": 1.6481902599334717, "learning_rate": 4.461609013837923e-05, "loss": 1.1623, "step": 19441 }, { "epoch": 0.6962594230665926, "grad_norm": 1.815705418586731, "learning_rate": 4.460643289770532e-05, "loss": 1.5606, "step": 19442 }, { "epoch": 0.696295235196161, "grad_norm": 2.0571019649505615, "learning_rate": 4.4596776402280396e-05, "loss": 1.4951, "step": 19443 }, { "epoch": 0.6963310473257293, "grad_norm": 1.345685362815857, "learning_rate": 4.458712065223442e-05, "loss": 1.5385, "step": 19444 }, { "epoch": 0.6963668594552975, "grad_norm": 2.47963285446167, "learning_rate": 4.4577465647697223e-05, "loss": 1.3012, "step": 19445 }, { "epoch": 0.6964026715848658, "grad_norm": 1.4152905941009521, "learning_rate": 4.456781138879873e-05, "loss": 1.0918, "step": 19446 }, { "epoch": 0.6964384837144341, "grad_norm": 1.7984778881072998, "learning_rate": 4.455815787566884e-05, "loss": 1.6954, "step": 19447 }, { "epoch": 0.6964742958440023, "grad_norm": 1.517166018486023, "learning_rate": 4.454850510843745e-05, "loss": 1.5157, "step": 19448 }, { "epoch": 0.6965101079735706, "grad_norm": 1.8566745519638062, "learning_rate": 4.45388530872344e-05, "loss": 1.1924, "step": 19449 }, { "epoch": 0.696545920103139, "grad_norm": 1.6976255178451538, "learning_rate": 4.452920181218947e-05, "loss": 1.5863, "step": 19450 }, { "epoch": 0.6965817322327073, "grad_norm": 1.6378042697906494, "learning_rate": 4.451955128343266e-05, "loss": 1.5929, "step": 19451 }, { "epoch": 0.6966175443622755, "grad_norm": 1.7423259019851685, "learning_rate": 4.450990150109367e-05, "loss": 1.5762, "step": 19452 }, { "epoch": 0.6966533564918438, "grad_norm": 1.9417845010757446, "learning_rate": 4.4500252465302384e-05, "loss": 1.7302, "step": 19453 }, { "epoch": 0.6966891686214121, "grad_norm": 1.3983720541000366, "learning_rate": 4.449060417618861e-05, "loss": 1.582, "step": 19454 }, { "epoch": 0.6967249807509803, "grad_norm": 1.4114207029342651, "learning_rate": 4.44809566338822e-05, "loss": 1.5144, "step": 19455 }, { "epoch": 0.6967607928805486, "grad_norm": 1.3205265998840332, "learning_rate": 4.447130983851285e-05, "loss": 1.6001, "step": 19456 }, { "epoch": 0.696796605010117, "grad_norm": 1.3919918537139893, "learning_rate": 4.446166379021042e-05, "loss": 1.5457, "step": 19457 }, { "epoch": 0.6968324171396852, "grad_norm": 1.9750583171844482, "learning_rate": 4.4452018489104684e-05, "loss": 1.4017, "step": 19458 }, { "epoch": 0.6968682292692535, "grad_norm": 1.6426039934158325, "learning_rate": 4.4442373935325364e-05, "loss": 1.3513, "step": 19459 }, { "epoch": 0.6969040413988218, "grad_norm": 1.6933245658874512, "learning_rate": 4.4432730129002265e-05, "loss": 1.542, "step": 19460 }, { "epoch": 0.69693985352839, "grad_norm": 1.6183847188949585, "learning_rate": 4.442308707026504e-05, "loss": 1.3853, "step": 19461 }, { "epoch": 0.6969756656579583, "grad_norm": 1.628896713256836, "learning_rate": 4.4413444759243564e-05, "loss": 1.4663, "step": 19462 }, { "epoch": 0.6970114777875266, "grad_norm": 1.992426872253418, "learning_rate": 4.440380319606744e-05, "loss": 2.0699, "step": 19463 }, { "epoch": 0.697047289917095, "grad_norm": 1.5472890138626099, "learning_rate": 4.439416238086643e-05, "loss": 1.3658, "step": 19464 }, { "epoch": 0.6970831020466632, "grad_norm": 1.2758383750915527, "learning_rate": 4.438452231377025e-05, "loss": 1.4012, "step": 19465 }, { "epoch": 0.6971189141762315, "grad_norm": 1.9764435291290283, "learning_rate": 4.4374882994908615e-05, "loss": 1.5115, "step": 19466 }, { "epoch": 0.6971547263057998, "grad_norm": 1.4551410675048828, "learning_rate": 4.436524442441118e-05, "loss": 1.1521, "step": 19467 }, { "epoch": 0.697190538435368, "grad_norm": 1.883821964263916, "learning_rate": 4.435560660240754e-05, "loss": 1.1386, "step": 19468 }, { "epoch": 0.6972263505649363, "grad_norm": 1.3271210193634033, "learning_rate": 4.434596952902752e-05, "loss": 1.5645, "step": 19469 }, { "epoch": 0.6972621626945046, "grad_norm": 1.4583014249801636, "learning_rate": 4.433633320440064e-05, "loss": 1.4722, "step": 19470 }, { "epoch": 0.697297974824073, "grad_norm": 1.2590141296386719, "learning_rate": 4.432669762865664e-05, "loss": 1.6447, "step": 19471 }, { "epoch": 0.6973337869536412, "grad_norm": 1.6529279947280884, "learning_rate": 4.431706280192503e-05, "loss": 1.2192, "step": 19472 }, { "epoch": 0.6973695990832095, "grad_norm": 1.5791985988616943, "learning_rate": 4.4307428724335595e-05, "loss": 1.4652, "step": 19473 }, { "epoch": 0.6974054112127778, "grad_norm": 1.7167974710464478, "learning_rate": 4.429779539601787e-05, "loss": 1.4894, "step": 19474 }, { "epoch": 0.697441223342346, "grad_norm": 2.156100273132324, "learning_rate": 4.428816281710142e-05, "loss": 1.4869, "step": 19475 }, { "epoch": 0.6974770354719143, "grad_norm": 1.4207909107208252, "learning_rate": 4.427853098771587e-05, "loss": 1.5364, "step": 19476 }, { "epoch": 0.6975128476014826, "grad_norm": 1.4815733432769775, "learning_rate": 4.426889990799082e-05, "loss": 1.434, "step": 19477 }, { "epoch": 0.697548659731051, "grad_norm": 1.5822980403900146, "learning_rate": 4.425926957805586e-05, "loss": 0.9941, "step": 19478 }, { "epoch": 0.6975844718606192, "grad_norm": 1.9064446687698364, "learning_rate": 4.424963999804046e-05, "loss": 1.3855, "step": 19479 }, { "epoch": 0.6976202839901875, "grad_norm": 2.002734422683716, "learning_rate": 4.4240011168074315e-05, "loss": 1.2231, "step": 19480 }, { "epoch": 0.6976560961197558, "grad_norm": 1.4305377006530762, "learning_rate": 4.423038308828685e-05, "loss": 1.5345, "step": 19481 }, { "epoch": 0.697691908249324, "grad_norm": 1.573127031326294, "learning_rate": 4.4220755758807695e-05, "loss": 1.2691, "step": 19482 }, { "epoch": 0.6977277203788923, "grad_norm": 1.9930295944213867, "learning_rate": 4.421112917976628e-05, "loss": 1.402, "step": 19483 }, { "epoch": 0.6977635325084606, "grad_norm": 1.787060022354126, "learning_rate": 4.420150335129215e-05, "loss": 1.0692, "step": 19484 }, { "epoch": 0.697799344638029, "grad_norm": 1.6993510723114014, "learning_rate": 4.419187827351485e-05, "loss": 1.5472, "step": 19485 }, { "epoch": 0.6978351567675972, "grad_norm": 1.5781887769699097, "learning_rate": 4.418225394656382e-05, "loss": 1.5185, "step": 19486 }, { "epoch": 0.6978709688971655, "grad_norm": 2.3409838676452637, "learning_rate": 4.417263037056856e-05, "loss": 1.5516, "step": 19487 }, { "epoch": 0.6979067810267338, "grad_norm": 1.6964595317840576, "learning_rate": 4.416300754565854e-05, "loss": 1.6612, "step": 19488 }, { "epoch": 0.697942593156302, "grad_norm": 1.3110731840133667, "learning_rate": 4.415338547196326e-05, "loss": 1.2798, "step": 19489 }, { "epoch": 0.6979784052858703, "grad_norm": 1.5717277526855469, "learning_rate": 4.414376414961208e-05, "loss": 1.5264, "step": 19490 }, { "epoch": 0.6980142174154386, "grad_norm": 1.8437398672103882, "learning_rate": 4.4134143578734576e-05, "loss": 1.4216, "step": 19491 }, { "epoch": 0.698050029545007, "grad_norm": 1.6376128196716309, "learning_rate": 4.41245237594601e-05, "loss": 1.3332, "step": 19492 }, { "epoch": 0.6980858416745752, "grad_norm": 1.9958080053329468, "learning_rate": 4.411490469191806e-05, "loss": 1.8033, "step": 19493 }, { "epoch": 0.6981216538041435, "grad_norm": 1.52802574634552, "learning_rate": 4.4105286376237874e-05, "loss": 1.495, "step": 19494 }, { "epoch": 0.6981574659337118, "grad_norm": 1.652212142944336, "learning_rate": 4.409566881254897e-05, "loss": 1.5077, "step": 19495 }, { "epoch": 0.69819327806328, "grad_norm": 1.9880157709121704, "learning_rate": 4.408605200098077e-05, "loss": 1.7005, "step": 19496 }, { "epoch": 0.6982290901928483, "grad_norm": 1.7847472429275513, "learning_rate": 4.407643594166257e-05, "loss": 1.4118, "step": 19497 }, { "epoch": 0.6982649023224166, "grad_norm": 1.8568692207336426, "learning_rate": 4.4066820634723805e-05, "loss": 1.539, "step": 19498 }, { "epoch": 0.6983007144519849, "grad_norm": 1.3895604610443115, "learning_rate": 4.405720608029381e-05, "loss": 1.5283, "step": 19499 }, { "epoch": 0.6983365265815532, "grad_norm": 1.667009949684143, "learning_rate": 4.404759227850198e-05, "loss": 1.3788, "step": 19500 }, { "epoch": 0.6983723387111215, "grad_norm": 1.8755829334259033, "learning_rate": 4.403797922947759e-05, "loss": 1.5928, "step": 19501 }, { "epoch": 0.6984081508406897, "grad_norm": 1.7361255884170532, "learning_rate": 4.4028366933349996e-05, "loss": 1.3978, "step": 19502 }, { "epoch": 0.698443962970258, "grad_norm": 1.4138158559799194, "learning_rate": 4.4018755390248566e-05, "loss": 1.3812, "step": 19503 }, { "epoch": 0.6984797750998263, "grad_norm": 1.610290765762329, "learning_rate": 4.400914460030254e-05, "loss": 1.708, "step": 19504 }, { "epoch": 0.6985155872293946, "grad_norm": 1.9601976871490479, "learning_rate": 4.3999534563641253e-05, "loss": 1.4285, "step": 19505 }, { "epoch": 0.6985513993589629, "grad_norm": 1.4728658199310303, "learning_rate": 4.3989925280393986e-05, "loss": 1.671, "step": 19506 }, { "epoch": 0.6985872114885312, "grad_norm": 1.6432338953018188, "learning_rate": 4.3980316750690065e-05, "loss": 1.3902, "step": 19507 }, { "epoch": 0.6986230236180995, "grad_norm": 1.4036003351211548, "learning_rate": 4.397070897465869e-05, "loss": 1.653, "step": 19508 }, { "epoch": 0.6986588357476677, "grad_norm": 1.8093888759613037, "learning_rate": 4.396110195242915e-05, "loss": 1.6891, "step": 19509 }, { "epoch": 0.698694647877236, "grad_norm": 1.9804387092590332, "learning_rate": 4.395149568413073e-05, "loss": 1.5497, "step": 19510 }, { "epoch": 0.6987304600068043, "grad_norm": 1.2982250452041626, "learning_rate": 4.394189016989261e-05, "loss": 1.3489, "step": 19511 }, { "epoch": 0.6987662721363725, "grad_norm": 1.9783587455749512, "learning_rate": 4.3932285409844046e-05, "loss": 1.4654, "step": 19512 }, { "epoch": 0.6988020842659409, "grad_norm": 2.4322969913482666, "learning_rate": 4.392268140411425e-05, "loss": 1.55, "step": 19513 }, { "epoch": 0.6988378963955092, "grad_norm": 2.154332160949707, "learning_rate": 4.391307815283249e-05, "loss": 1.8838, "step": 19514 }, { "epoch": 0.6988737085250775, "grad_norm": 1.7512561082839966, "learning_rate": 4.390347565612787e-05, "loss": 1.6804, "step": 19515 }, { "epoch": 0.6989095206546457, "grad_norm": 1.7126457691192627, "learning_rate": 4.3893873914129635e-05, "loss": 1.0483, "step": 19516 }, { "epoch": 0.698945332784214, "grad_norm": 1.6176780462265015, "learning_rate": 4.388427292696695e-05, "loss": 1.2061, "step": 19517 }, { "epoch": 0.6989811449137823, "grad_norm": 1.496763825416565, "learning_rate": 4.387467269476902e-05, "loss": 1.4142, "step": 19518 }, { "epoch": 0.6990169570433505, "grad_norm": 2.7602691650390625, "learning_rate": 4.3865073217664944e-05, "loss": 1.5354, "step": 19519 }, { "epoch": 0.6990527691729189, "grad_norm": 2.1753737926483154, "learning_rate": 4.38554744957839e-05, "loss": 1.5896, "step": 19520 }, { "epoch": 0.6990885813024872, "grad_norm": 2.356755018234253, "learning_rate": 4.384587652925506e-05, "loss": 1.5859, "step": 19521 }, { "epoch": 0.6991243934320555, "grad_norm": 1.905924677848816, "learning_rate": 4.383627931820747e-05, "loss": 1.5317, "step": 19522 }, { "epoch": 0.6991602055616237, "grad_norm": 2.0932862758636475, "learning_rate": 4.382668286277031e-05, "loss": 1.7417, "step": 19523 }, { "epoch": 0.699196017691192, "grad_norm": 1.5198270082473755, "learning_rate": 4.381708716307267e-05, "loss": 1.3734, "step": 19524 }, { "epoch": 0.6992318298207603, "grad_norm": 1.43095862865448, "learning_rate": 4.3807492219243686e-05, "loss": 1.4217, "step": 19525 }, { "epoch": 0.6992676419503285, "grad_norm": 2.038198947906494, "learning_rate": 4.379789803141238e-05, "loss": 1.6064, "step": 19526 }, { "epoch": 0.6993034540798969, "grad_norm": 1.9979199171066284, "learning_rate": 4.378830459970785e-05, "loss": 1.5887, "step": 19527 }, { "epoch": 0.6993392662094652, "grad_norm": 1.6111292839050293, "learning_rate": 4.3778711924259216e-05, "loss": 1.5713, "step": 19528 }, { "epoch": 0.6993750783390335, "grad_norm": 1.396235704421997, "learning_rate": 4.3769120005195465e-05, "loss": 1.1932, "step": 19529 }, { "epoch": 0.6994108904686017, "grad_norm": 1.9758912324905396, "learning_rate": 4.375952884264566e-05, "loss": 1.4702, "step": 19530 }, { "epoch": 0.69944670259817, "grad_norm": 1.5198663473129272, "learning_rate": 4.374993843673886e-05, "loss": 1.1155, "step": 19531 }, { "epoch": 0.6994825147277383, "grad_norm": 1.6794745922088623, "learning_rate": 4.37403487876041e-05, "loss": 1.3603, "step": 19532 }, { "epoch": 0.6995183268573065, "grad_norm": 1.6363730430603027, "learning_rate": 4.373075989537035e-05, "loss": 1.4035, "step": 19533 }, { "epoch": 0.6995541389868749, "grad_norm": 1.9012279510498047, "learning_rate": 4.372117176016665e-05, "loss": 1.3656, "step": 19534 }, { "epoch": 0.6995899511164432, "grad_norm": 1.7077008485794067, "learning_rate": 4.371158438212199e-05, "loss": 1.2935, "step": 19535 }, { "epoch": 0.6996257632460114, "grad_norm": 1.684689998626709, "learning_rate": 4.370199776136538e-05, "loss": 1.5655, "step": 19536 }, { "epoch": 0.6996615753755797, "grad_norm": 2.581475019454956, "learning_rate": 4.3692411898025746e-05, "loss": 1.4285, "step": 19537 }, { "epoch": 0.699697387505148, "grad_norm": 2.067708730697632, "learning_rate": 4.368282679223207e-05, "loss": 1.7469, "step": 19538 }, { "epoch": 0.6997331996347163, "grad_norm": 1.2996182441711426, "learning_rate": 4.367324244411335e-05, "loss": 1.0087, "step": 19539 }, { "epoch": 0.6997690117642845, "grad_norm": 1.6188222169876099, "learning_rate": 4.3663658853798476e-05, "loss": 1.4346, "step": 19540 }, { "epoch": 0.6998048238938529, "grad_norm": 1.531131386756897, "learning_rate": 4.365407602141639e-05, "loss": 1.5322, "step": 19541 }, { "epoch": 0.6998406360234212, "grad_norm": 2.2801246643066406, "learning_rate": 4.364449394709603e-05, "loss": 1.7024, "step": 19542 }, { "epoch": 0.6998764481529894, "grad_norm": 2.3423590660095215, "learning_rate": 4.363491263096635e-05, "loss": 1.7841, "step": 19543 }, { "epoch": 0.6999122602825577, "grad_norm": 1.4121369123458862, "learning_rate": 4.362533207315618e-05, "loss": 1.5293, "step": 19544 }, { "epoch": 0.699948072412126, "grad_norm": 2.1785571575164795, "learning_rate": 4.361575227379444e-05, "loss": 1.5964, "step": 19545 }, { "epoch": 0.6999838845416942, "grad_norm": 1.5997748374938965, "learning_rate": 4.360617323301007e-05, "loss": 1.5425, "step": 19546 }, { "epoch": 0.7000196966712625, "grad_norm": 2.7810516357421875, "learning_rate": 4.359659495093186e-05, "loss": 1.6532, "step": 19547 }, { "epoch": 0.7000555088008309, "grad_norm": 1.4571328163146973, "learning_rate": 4.35870174276887e-05, "loss": 1.472, "step": 19548 }, { "epoch": 0.7000913209303992, "grad_norm": 2.0664026737213135, "learning_rate": 4.357744066340946e-05, "loss": 1.59, "step": 19549 }, { "epoch": 0.7001271330599674, "grad_norm": 1.6976768970489502, "learning_rate": 4.356786465822301e-05, "loss": 1.6835, "step": 19550 }, { "epoch": 0.7001629451895357, "grad_norm": 1.4122880697250366, "learning_rate": 4.3558289412258114e-05, "loss": 1.3165, "step": 19551 }, { "epoch": 0.700198757319104, "grad_norm": 1.928450584411621, "learning_rate": 4.354871492564363e-05, "loss": 1.4275, "step": 19552 }, { "epoch": 0.7002345694486722, "grad_norm": 1.8557058572769165, "learning_rate": 4.353914119850837e-05, "loss": 1.0927, "step": 19553 }, { "epoch": 0.7002703815782405, "grad_norm": 1.8503695726394653, "learning_rate": 4.3529568230981165e-05, "loss": 1.1398, "step": 19554 }, { "epoch": 0.7003061937078089, "grad_norm": 1.4184296131134033, "learning_rate": 4.351999602319079e-05, "loss": 1.5463, "step": 19555 }, { "epoch": 0.7003420058373772, "grad_norm": 2.062753200531006, "learning_rate": 4.351042457526594e-05, "loss": 1.2678, "step": 19556 }, { "epoch": 0.7003778179669454, "grad_norm": 1.8937575817108154, "learning_rate": 4.350085388733553e-05, "loss": 1.1757, "step": 19557 }, { "epoch": 0.7004136300965137, "grad_norm": 1.4495995044708252, "learning_rate": 4.349128395952821e-05, "loss": 1.4901, "step": 19558 }, { "epoch": 0.700449442226082, "grad_norm": 1.6838757991790771, "learning_rate": 4.3481714791972816e-05, "loss": 1.8831, "step": 19559 }, { "epoch": 0.7004852543556502, "grad_norm": 1.3522682189941406, "learning_rate": 4.3472146384797973e-05, "loss": 1.381, "step": 19560 }, { "epoch": 0.7005210664852185, "grad_norm": 1.5966675281524658, "learning_rate": 4.3462578738132557e-05, "loss": 1.2488, "step": 19561 }, { "epoch": 0.7005568786147869, "grad_norm": 1.5900689363479614, "learning_rate": 4.345301185210517e-05, "loss": 1.3429, "step": 19562 }, { "epoch": 0.7005926907443552, "grad_norm": 2.1359074115753174, "learning_rate": 4.344344572684459e-05, "loss": 1.5794, "step": 19563 }, { "epoch": 0.7006285028739234, "grad_norm": 1.512988567352295, "learning_rate": 4.343388036247952e-05, "loss": 1.2947, "step": 19564 }, { "epoch": 0.7006643150034917, "grad_norm": 1.4352556467056274, "learning_rate": 4.342431575913858e-05, "loss": 1.2725, "step": 19565 }, { "epoch": 0.70070012713306, "grad_norm": 1.7753974199295044, "learning_rate": 4.341475191695054e-05, "loss": 1.6684, "step": 19566 }, { "epoch": 0.7007359392626282, "grad_norm": 1.5111207962036133, "learning_rate": 4.340518883604395e-05, "loss": 1.3503, "step": 19567 }, { "epoch": 0.7007717513921965, "grad_norm": 1.4870178699493408, "learning_rate": 4.339562651654761e-05, "loss": 1.176, "step": 19568 }, { "epoch": 0.7008075635217649, "grad_norm": 1.6527915000915527, "learning_rate": 4.338606495859007e-05, "loss": 1.715, "step": 19569 }, { "epoch": 0.7008433756513331, "grad_norm": 2.162165403366089, "learning_rate": 4.3376504162300035e-05, "loss": 1.7323, "step": 19570 }, { "epoch": 0.7008791877809014, "grad_norm": 1.7663673162460327, "learning_rate": 4.336694412780605e-05, "loss": 1.3105, "step": 19571 }, { "epoch": 0.7009149999104697, "grad_norm": 1.6505602598190308, "learning_rate": 4.3357384855236796e-05, "loss": 1.3928, "step": 19572 }, { "epoch": 0.700950812040038, "grad_norm": 1.5301345586776733, "learning_rate": 4.33478263447209e-05, "loss": 1.2207, "step": 19573 }, { "epoch": 0.7009866241696062, "grad_norm": 1.6930309534072876, "learning_rate": 4.333826859638684e-05, "loss": 1.603, "step": 19574 }, { "epoch": 0.7010224362991745, "grad_norm": 1.439517855644226, "learning_rate": 4.332871161036337e-05, "loss": 1.0671, "step": 19575 }, { "epoch": 0.7010582484287429, "grad_norm": 1.5984947681427002, "learning_rate": 4.331915538677894e-05, "loss": 1.4029, "step": 19576 }, { "epoch": 0.7010940605583111, "grad_norm": 1.5472118854522705, "learning_rate": 4.3309599925762214e-05, "loss": 1.2994, "step": 19577 }, { "epoch": 0.7011298726878794, "grad_norm": 1.2958440780639648, "learning_rate": 4.330004522744161e-05, "loss": 1.6099, "step": 19578 }, { "epoch": 0.7011656848174477, "grad_norm": 1.5507088899612427, "learning_rate": 4.329049129194583e-05, "loss": 1.4012, "step": 19579 }, { "epoch": 0.701201496947016, "grad_norm": 2.030113935470581, "learning_rate": 4.3280938119403346e-05, "loss": 1.4628, "step": 19580 }, { "epoch": 0.7012373090765842, "grad_norm": 1.508935570716858, "learning_rate": 4.3271385709942636e-05, "loss": 1.3467, "step": 19581 }, { "epoch": 0.7012731212061525, "grad_norm": 1.4302729368209839, "learning_rate": 4.326183406369226e-05, "loss": 1.3545, "step": 19582 }, { "epoch": 0.7013089333357209, "grad_norm": 2.1700620651245117, "learning_rate": 4.325228318078073e-05, "loss": 1.4485, "step": 19583 }, { "epoch": 0.7013447454652891, "grad_norm": 1.713890790939331, "learning_rate": 4.324273306133655e-05, "loss": 1.7539, "step": 19584 }, { "epoch": 0.7013805575948574, "grad_norm": 1.60768723487854, "learning_rate": 4.3233183705488156e-05, "loss": 1.1099, "step": 19585 }, { "epoch": 0.7014163697244257, "grad_norm": 2.0066118240356445, "learning_rate": 4.322363511336405e-05, "loss": 1.4433, "step": 19586 }, { "epoch": 0.7014521818539939, "grad_norm": 2.282560110092163, "learning_rate": 4.32140872850927e-05, "loss": 1.2975, "step": 19587 }, { "epoch": 0.7014879939835622, "grad_norm": 1.573010802268982, "learning_rate": 4.320454022080259e-05, "loss": 1.4643, "step": 19588 }, { "epoch": 0.7015238061131305, "grad_norm": 1.5037258863449097, "learning_rate": 4.3194993920622095e-05, "loss": 1.5382, "step": 19589 }, { "epoch": 0.7015596182426989, "grad_norm": 1.8554325103759766, "learning_rate": 4.318544838467968e-05, "loss": 1.5909, "step": 19590 }, { "epoch": 0.7015954303722671, "grad_norm": 2.168818950653076, "learning_rate": 4.3175903613103815e-05, "loss": 1.6751, "step": 19591 }, { "epoch": 0.7016312425018354, "grad_norm": 1.7133216857910156, "learning_rate": 4.316635960602283e-05, "loss": 1.2098, "step": 19592 }, { "epoch": 0.7016670546314037, "grad_norm": 1.6379142999649048, "learning_rate": 4.3156816363565166e-05, "loss": 1.4609, "step": 19593 }, { "epoch": 0.7017028667609719, "grad_norm": 1.4387273788452148, "learning_rate": 4.3147273885859215e-05, "loss": 1.1765, "step": 19594 }, { "epoch": 0.7017386788905402, "grad_norm": 1.2745956182479858, "learning_rate": 4.3137732173033394e-05, "loss": 1.553, "step": 19595 }, { "epoch": 0.7017744910201085, "grad_norm": 1.6397004127502441, "learning_rate": 4.3128191225216005e-05, "loss": 1.2392, "step": 19596 }, { "epoch": 0.7018103031496769, "grad_norm": 1.4887009859085083, "learning_rate": 4.3118651042535444e-05, "loss": 1.3739, "step": 19597 }, { "epoch": 0.7018461152792451, "grad_norm": 1.4074066877365112, "learning_rate": 4.31091116251201e-05, "loss": 1.4146, "step": 19598 }, { "epoch": 0.7018819274088134, "grad_norm": 1.663396954536438, "learning_rate": 4.3099572973098236e-05, "loss": 1.4598, "step": 19599 }, { "epoch": 0.7019177395383817, "grad_norm": 2.8375847339630127, "learning_rate": 4.309003508659822e-05, "loss": 1.3936, "step": 19600 }, { "epoch": 0.7019535516679499, "grad_norm": 1.628321886062622, "learning_rate": 4.3080497965748376e-05, "loss": 1.7446, "step": 19601 }, { "epoch": 0.7019893637975182, "grad_norm": 1.359993815422058, "learning_rate": 4.307096161067704e-05, "loss": 1.1517, "step": 19602 }, { "epoch": 0.7020251759270865, "grad_norm": 1.599370002746582, "learning_rate": 4.3061426021512453e-05, "loss": 1.2006, "step": 19603 }, { "epoch": 0.7020609880566548, "grad_norm": 1.3849345445632935, "learning_rate": 4.305189119838293e-05, "loss": 1.2563, "step": 19604 }, { "epoch": 0.7020968001862231, "grad_norm": 1.7541930675506592, "learning_rate": 4.304235714141677e-05, "loss": 1.5481, "step": 19605 }, { "epoch": 0.7021326123157914, "grad_norm": 1.3335884809494019, "learning_rate": 4.303282385074224e-05, "loss": 1.4315, "step": 19606 }, { "epoch": 0.7021684244453597, "grad_norm": 1.3472892045974731, "learning_rate": 4.3023291326487556e-05, "loss": 1.4085, "step": 19607 }, { "epoch": 0.7022042365749279, "grad_norm": 1.5135165452957153, "learning_rate": 4.301375956878099e-05, "loss": 1.0491, "step": 19608 }, { "epoch": 0.7022400487044962, "grad_norm": 2.763059616088867, "learning_rate": 4.300422857775081e-05, "loss": 1.5307, "step": 19609 }, { "epoch": 0.7022758608340645, "grad_norm": 1.5471773147583008, "learning_rate": 4.2994698353525184e-05, "loss": 1.5784, "step": 19610 }, { "epoch": 0.7023116729636327, "grad_norm": 1.618385910987854, "learning_rate": 4.298516889623238e-05, "loss": 1.3744, "step": 19611 }, { "epoch": 0.7023474850932011, "grad_norm": 2.3037314414978027, "learning_rate": 4.297564020600056e-05, "loss": 1.6599, "step": 19612 }, { "epoch": 0.7023832972227694, "grad_norm": 2.060870885848999, "learning_rate": 4.2966112282957985e-05, "loss": 1.4654, "step": 19613 }, { "epoch": 0.7024191093523376, "grad_norm": 1.506044864654541, "learning_rate": 4.295658512723277e-05, "loss": 1.1635, "step": 19614 }, { "epoch": 0.7024549214819059, "grad_norm": 1.5053657293319702, "learning_rate": 4.2947058738953115e-05, "loss": 1.5164, "step": 19615 }, { "epoch": 0.7024907336114742, "grad_norm": 1.444291591644287, "learning_rate": 4.293753311824724e-05, "loss": 1.4666, "step": 19616 }, { "epoch": 0.7025265457410425, "grad_norm": 1.5131818056106567, "learning_rate": 4.2928008265243205e-05, "loss": 1.1814, "step": 19617 }, { "epoch": 0.7025623578706107, "grad_norm": 1.3681418895721436, "learning_rate": 4.2918484180069205e-05, "loss": 1.2736, "step": 19618 }, { "epoch": 0.7025981700001791, "grad_norm": 1.5410593748092651, "learning_rate": 4.290896086285338e-05, "loss": 1.0753, "step": 19619 }, { "epoch": 0.7026339821297474, "grad_norm": 1.949423909187317, "learning_rate": 4.289943831372386e-05, "loss": 1.4629, "step": 19620 }, { "epoch": 0.7026697942593156, "grad_norm": 1.885960578918457, "learning_rate": 4.2889916532808716e-05, "loss": 1.6269, "step": 19621 }, { "epoch": 0.7027056063888839, "grad_norm": 1.5690264701843262, "learning_rate": 4.2880395520236086e-05, "loss": 0.934, "step": 19622 }, { "epoch": 0.7027414185184522, "grad_norm": 1.42863130569458, "learning_rate": 4.287087527613405e-05, "loss": 1.5016, "step": 19623 }, { "epoch": 0.7027772306480204, "grad_norm": 1.8398011922836304, "learning_rate": 4.2861355800630734e-05, "loss": 1.6303, "step": 19624 }, { "epoch": 0.7028130427775887, "grad_norm": 1.50557541847229, "learning_rate": 4.285183709385413e-05, "loss": 1.5899, "step": 19625 }, { "epoch": 0.7028488549071571, "grad_norm": 2.3311352729797363, "learning_rate": 4.284231915593234e-05, "loss": 1.3451, "step": 19626 }, { "epoch": 0.7028846670367254, "grad_norm": 1.7532374858856201, "learning_rate": 4.283280198699346e-05, "loss": 1.5832, "step": 19627 }, { "epoch": 0.7029204791662936, "grad_norm": 1.79032564163208, "learning_rate": 4.2823285587165454e-05, "loss": 1.2493, "step": 19628 }, { "epoch": 0.7029562912958619, "grad_norm": 2.0720388889312744, "learning_rate": 4.281376995657638e-05, "loss": 1.8434, "step": 19629 }, { "epoch": 0.7029921034254302, "grad_norm": 1.5829167366027832, "learning_rate": 4.2804255095354276e-05, "loss": 1.6059, "step": 19630 }, { "epoch": 0.7030279155549984, "grad_norm": 1.614844799041748, "learning_rate": 4.279474100362717e-05, "loss": 1.6384, "step": 19631 }, { "epoch": 0.7030637276845667, "grad_norm": 1.5533795356750488, "learning_rate": 4.278522768152301e-05, "loss": 1.5463, "step": 19632 }, { "epoch": 0.7030995398141351, "grad_norm": 1.7275530099868774, "learning_rate": 4.27757151291698e-05, "loss": 1.2183, "step": 19633 }, { "epoch": 0.7031353519437034, "grad_norm": 1.5814635753631592, "learning_rate": 4.2766203346695565e-05, "loss": 1.5156, "step": 19634 }, { "epoch": 0.7031711640732716, "grad_norm": 2.4060182571411133, "learning_rate": 4.27566923342282e-05, "loss": 1.5844, "step": 19635 }, { "epoch": 0.7032069762028399, "grad_norm": 2.1266350746154785, "learning_rate": 4.274718209189571e-05, "loss": 1.4377, "step": 19636 }, { "epoch": 0.7032427883324082, "grad_norm": 1.3293077945709229, "learning_rate": 4.273767261982603e-05, "loss": 1.4234, "step": 19637 }, { "epoch": 0.7032786004619764, "grad_norm": 1.726370930671692, "learning_rate": 4.272816391814714e-05, "loss": 1.6574, "step": 19638 }, { "epoch": 0.7033144125915447, "grad_norm": 1.4099525213241577, "learning_rate": 4.271865598698689e-05, "loss": 1.0711, "step": 19639 }, { "epoch": 0.7033502247211131, "grad_norm": 1.8294678926467896, "learning_rate": 4.2709148826473234e-05, "loss": 1.6613, "step": 19640 }, { "epoch": 0.7033860368506814, "grad_norm": 1.2897287607192993, "learning_rate": 4.269964243673408e-05, "loss": 1.2772, "step": 19641 }, { "epoch": 0.7034218489802496, "grad_norm": 1.5318269729614258, "learning_rate": 4.2690136817897363e-05, "loss": 1.2968, "step": 19642 }, { "epoch": 0.7034576611098179, "grad_norm": 1.5041943788528442, "learning_rate": 4.2680631970090935e-05, "loss": 1.3391, "step": 19643 }, { "epoch": 0.7034934732393862, "grad_norm": 1.3494664430618286, "learning_rate": 4.2671127893442586e-05, "loss": 1.594, "step": 19644 }, { "epoch": 0.7035292853689544, "grad_norm": 1.539660930633545, "learning_rate": 4.266162458808034e-05, "loss": 1.4438, "step": 19645 }, { "epoch": 0.7035650974985227, "grad_norm": 1.8535983562469482, "learning_rate": 4.2652122054131936e-05, "loss": 1.7111, "step": 19646 }, { "epoch": 0.7036009096280911, "grad_norm": 1.5027899742126465, "learning_rate": 4.264262029172527e-05, "loss": 1.62, "step": 19647 }, { "epoch": 0.7036367217576593, "grad_norm": 2.0121660232543945, "learning_rate": 4.2633119300988146e-05, "loss": 1.4439, "step": 19648 }, { "epoch": 0.7036725338872276, "grad_norm": 1.5586117506027222, "learning_rate": 4.262361908204844e-05, "loss": 1.2805, "step": 19649 }, { "epoch": 0.7037083460167959, "grad_norm": 1.7597720623016357, "learning_rate": 4.26141196350339e-05, "loss": 1.2432, "step": 19650 }, { "epoch": 0.7037441581463642, "grad_norm": 1.6648684740066528, "learning_rate": 4.260462096007235e-05, "loss": 1.3918, "step": 19651 }, { "epoch": 0.7037799702759324, "grad_norm": 1.5515859127044678, "learning_rate": 4.259512305729164e-05, "loss": 1.4651, "step": 19652 }, { "epoch": 0.7038157824055007, "grad_norm": 1.6097532510757446, "learning_rate": 4.258562592681945e-05, "loss": 1.4623, "step": 19653 }, { "epoch": 0.7038515945350691, "grad_norm": 1.509201169013977, "learning_rate": 4.2576129568783654e-05, "loss": 1.6167, "step": 19654 }, { "epoch": 0.7038874066646373, "grad_norm": 2.127002716064453, "learning_rate": 4.2566633983311885e-05, "loss": 1.636, "step": 19655 }, { "epoch": 0.7039232187942056, "grad_norm": 2.234020948410034, "learning_rate": 4.2557139170532045e-05, "loss": 1.4049, "step": 19656 }, { "epoch": 0.7039590309237739, "grad_norm": 1.5483022928237915, "learning_rate": 4.2547645130571764e-05, "loss": 1.6323, "step": 19657 }, { "epoch": 0.7039948430533421, "grad_norm": 1.733654499053955, "learning_rate": 4.253815186355881e-05, "loss": 1.5718, "step": 19658 }, { "epoch": 0.7040306551829104, "grad_norm": 1.627803921699524, "learning_rate": 4.2528659369620905e-05, "loss": 1.5466, "step": 19659 }, { "epoch": 0.7040664673124787, "grad_norm": 1.8518602848052979, "learning_rate": 4.2519167648885785e-05, "loss": 1.5608, "step": 19660 }, { "epoch": 0.7041022794420471, "grad_norm": 1.3604775667190552, "learning_rate": 4.250967670148113e-05, "loss": 1.2684, "step": 19661 }, { "epoch": 0.7041380915716153, "grad_norm": 1.494215488433838, "learning_rate": 4.250018652753454e-05, "loss": 1.5892, "step": 19662 }, { "epoch": 0.7041739037011836, "grad_norm": 1.7942612171173096, "learning_rate": 4.2490697127173826e-05, "loss": 1.3718, "step": 19663 }, { "epoch": 0.7042097158307519, "grad_norm": 1.7529025077819824, "learning_rate": 4.248120850052658e-05, "loss": 1.3392, "step": 19664 }, { "epoch": 0.7042455279603201, "grad_norm": 1.6615201234817505, "learning_rate": 4.247172064772053e-05, "loss": 1.4284, "step": 19665 }, { "epoch": 0.7042813400898884, "grad_norm": 2.163020133972168, "learning_rate": 4.246223356888318e-05, "loss": 1.4905, "step": 19666 }, { "epoch": 0.7043171522194567, "grad_norm": 1.709355115890503, "learning_rate": 4.2452747264142335e-05, "loss": 1.2316, "step": 19667 }, { "epoch": 0.7043529643490251, "grad_norm": 1.8853095769882202, "learning_rate": 4.244326173362555e-05, "loss": 1.541, "step": 19668 }, { "epoch": 0.7043887764785933, "grad_norm": 2.275879383087158, "learning_rate": 4.2433776977460396e-05, "loss": 1.6956, "step": 19669 }, { "epoch": 0.7044245886081616, "grad_norm": 1.2705893516540527, "learning_rate": 4.242429299577452e-05, "loss": 1.2138, "step": 19670 }, { "epoch": 0.7044604007377299, "grad_norm": 2.3329508304595947, "learning_rate": 4.241480978869551e-05, "loss": 1.461, "step": 19671 }, { "epoch": 0.7044962128672981, "grad_norm": 1.6873449087142944, "learning_rate": 4.2405327356351e-05, "loss": 1.468, "step": 19672 }, { "epoch": 0.7045320249968664, "grad_norm": 1.3374345302581787, "learning_rate": 4.239584569886843e-05, "loss": 1.4267, "step": 19673 }, { "epoch": 0.7045678371264347, "grad_norm": 1.6494673490524292, "learning_rate": 4.2386364816375545e-05, "loss": 1.5675, "step": 19674 }, { "epoch": 0.704603649256003, "grad_norm": 2.307361364364624, "learning_rate": 4.2376884708999754e-05, "loss": 1.659, "step": 19675 }, { "epoch": 0.7046394613855713, "grad_norm": 1.6656765937805176, "learning_rate": 4.23674053768687e-05, "loss": 1.4741, "step": 19676 }, { "epoch": 0.7046752735151396, "grad_norm": 2.520934820175171, "learning_rate": 4.2357926820109816e-05, "loss": 1.3717, "step": 19677 }, { "epoch": 0.7047110856447079, "grad_norm": 1.7585898637771606, "learning_rate": 4.234844903885068e-05, "loss": 1.3923, "step": 19678 }, { "epoch": 0.7047468977742761, "grad_norm": 1.6115695238113403, "learning_rate": 4.233897203321883e-05, "loss": 1.4965, "step": 19679 }, { "epoch": 0.7047827099038444, "grad_norm": 1.3951077461242676, "learning_rate": 4.23294958033417e-05, "loss": 1.6331, "step": 19680 }, { "epoch": 0.7048185220334127, "grad_norm": 1.9594569206237793, "learning_rate": 4.232002034934681e-05, "loss": 1.4238, "step": 19681 }, { "epoch": 0.704854334162981, "grad_norm": 1.7811216115951538, "learning_rate": 4.231054567136166e-05, "loss": 1.4687, "step": 19682 }, { "epoch": 0.7048901462925493, "grad_norm": 1.6424189805984497, "learning_rate": 4.230107176951372e-05, "loss": 1.3477, "step": 19683 }, { "epoch": 0.7049259584221176, "grad_norm": 1.5223708152770996, "learning_rate": 4.229159864393037e-05, "loss": 1.6541, "step": 19684 }, { "epoch": 0.7049617705516859, "grad_norm": 1.9250037670135498, "learning_rate": 4.2282126294739186e-05, "loss": 1.7329, "step": 19685 }, { "epoch": 0.7049975826812541, "grad_norm": 1.6363168954849243, "learning_rate": 4.227265472206756e-05, "loss": 1.5585, "step": 19686 }, { "epoch": 0.7050333948108224, "grad_norm": 1.635489821434021, "learning_rate": 4.226318392604285e-05, "loss": 1.2106, "step": 19687 }, { "epoch": 0.7050692069403907, "grad_norm": 1.4792739152908325, "learning_rate": 4.225371390679254e-05, "loss": 1.4307, "step": 19688 }, { "epoch": 0.705105019069959, "grad_norm": 1.7022123336791992, "learning_rate": 4.224424466444401e-05, "loss": 1.6674, "step": 19689 }, { "epoch": 0.7051408311995273, "grad_norm": 1.5994065999984741, "learning_rate": 4.2234776199124705e-05, "loss": 1.3563, "step": 19690 }, { "epoch": 0.7051766433290956, "grad_norm": 1.511945128440857, "learning_rate": 4.222530851096194e-05, "loss": 1.5933, "step": 19691 }, { "epoch": 0.7052124554586638, "grad_norm": 1.8896557092666626, "learning_rate": 4.221584160008313e-05, "loss": 1.4839, "step": 19692 }, { "epoch": 0.7052482675882321, "grad_norm": 1.4836983680725098, "learning_rate": 4.220637546661562e-05, "loss": 1.4684, "step": 19693 }, { "epoch": 0.7052840797178004, "grad_norm": 1.3161821365356445, "learning_rate": 4.2196910110686826e-05, "loss": 1.5167, "step": 19694 }, { "epoch": 0.7053198918473687, "grad_norm": 1.367013692855835, "learning_rate": 4.218744553242402e-05, "loss": 1.6204, "step": 19695 }, { "epoch": 0.705355703976937, "grad_norm": 1.7846018075942993, "learning_rate": 4.217798173195454e-05, "loss": 1.3237, "step": 19696 }, { "epoch": 0.7053915161065053, "grad_norm": 1.9348431825637817, "learning_rate": 4.216851870940578e-05, "loss": 1.5916, "step": 19697 }, { "epoch": 0.7054273282360736, "grad_norm": 1.376637578010559, "learning_rate": 4.2159056464904964e-05, "loss": 1.316, "step": 19698 }, { "epoch": 0.7054631403656418, "grad_norm": 2.1042535305023193, "learning_rate": 4.2149594998579445e-05, "loss": 1.1512, "step": 19699 }, { "epoch": 0.7054989524952101, "grad_norm": 1.6151759624481201, "learning_rate": 4.214013431055649e-05, "loss": 1.2476, "step": 19700 }, { "epoch": 0.7055347646247784, "grad_norm": 1.6883217096328735, "learning_rate": 4.213067440096343e-05, "loss": 1.5371, "step": 19701 }, { "epoch": 0.7055705767543466, "grad_norm": 1.4629133939743042, "learning_rate": 4.212121526992747e-05, "loss": 1.6678, "step": 19702 }, { "epoch": 0.705606388883915, "grad_norm": 1.3969576358795166, "learning_rate": 4.211175691757591e-05, "loss": 1.4144, "step": 19703 }, { "epoch": 0.7056422010134833, "grad_norm": 1.6211917400360107, "learning_rate": 4.2102299344036014e-05, "loss": 1.4869, "step": 19704 }, { "epoch": 0.7056780131430516, "grad_norm": 1.6717114448547363, "learning_rate": 4.2092842549434954e-05, "loss": 1.259, "step": 19705 }, { "epoch": 0.7057138252726198, "grad_norm": 1.6694384813308716, "learning_rate": 4.208338653390002e-05, "loss": 1.5622, "step": 19706 }, { "epoch": 0.7057496374021881, "grad_norm": 1.3916771411895752, "learning_rate": 4.20739312975584e-05, "loss": 1.5155, "step": 19707 }, { "epoch": 0.7057854495317564, "grad_norm": 1.4139550924301147, "learning_rate": 4.206447684053735e-05, "loss": 1.3517, "step": 19708 }, { "epoch": 0.7058212616613246, "grad_norm": 2.4061508178710938, "learning_rate": 4.2055023162964e-05, "loss": 1.742, "step": 19709 }, { "epoch": 0.705857073790893, "grad_norm": 1.6226320266723633, "learning_rate": 4.2045570264965574e-05, "loss": 1.6635, "step": 19710 }, { "epoch": 0.7058928859204613, "grad_norm": 1.434448003768921, "learning_rate": 4.203611814666925e-05, "loss": 1.2733, "step": 19711 }, { "epoch": 0.7059286980500296, "grad_norm": 1.5434811115264893, "learning_rate": 4.202666680820221e-05, "loss": 1.2843, "step": 19712 }, { "epoch": 0.7059645101795978, "grad_norm": 1.7157692909240723, "learning_rate": 4.201721624969156e-05, "loss": 1.4567, "step": 19713 }, { "epoch": 0.7060003223091661, "grad_norm": 1.388255000114441, "learning_rate": 4.200776647126447e-05, "loss": 1.4829, "step": 19714 }, { "epoch": 0.7060361344387344, "grad_norm": 1.2208225727081299, "learning_rate": 4.199831747304811e-05, "loss": 1.4678, "step": 19715 }, { "epoch": 0.7060719465683026, "grad_norm": 1.6040128469467163, "learning_rate": 4.198886925516954e-05, "loss": 1.5309, "step": 19716 }, { "epoch": 0.706107758697871, "grad_norm": 1.2374918460845947, "learning_rate": 4.19794218177559e-05, "loss": 1.4965, "step": 19717 }, { "epoch": 0.7061435708274393, "grad_norm": 1.8679009675979614, "learning_rate": 4.196997516093431e-05, "loss": 1.6104, "step": 19718 }, { "epoch": 0.7061793829570076, "grad_norm": 1.7728638648986816, "learning_rate": 4.196052928483188e-05, "loss": 1.5767, "step": 19719 }, { "epoch": 0.7062151950865758, "grad_norm": 1.6892356872558594, "learning_rate": 4.195108418957563e-05, "loss": 1.5712, "step": 19720 }, { "epoch": 0.7062510072161441, "grad_norm": 1.1942442655563354, "learning_rate": 4.194163987529266e-05, "loss": 1.105, "step": 19721 }, { "epoch": 0.7062868193457124, "grad_norm": 2.3192408084869385, "learning_rate": 4.1932196342110076e-05, "loss": 1.4804, "step": 19722 }, { "epoch": 0.7063226314752806, "grad_norm": 1.7069861888885498, "learning_rate": 4.1922753590154854e-05, "loss": 1.5507, "step": 19723 }, { "epoch": 0.706358443604849, "grad_norm": 2.246624708175659, "learning_rate": 4.1913311619554064e-05, "loss": 1.5065, "step": 19724 }, { "epoch": 0.7063942557344173, "grad_norm": 1.9762353897094727, "learning_rate": 4.1903870430434736e-05, "loss": 1.7526, "step": 19725 }, { "epoch": 0.7064300678639855, "grad_norm": 1.4072891473770142, "learning_rate": 4.189443002292392e-05, "loss": 1.5152, "step": 19726 }, { "epoch": 0.7064658799935538, "grad_norm": 2.209500312805176, "learning_rate": 4.1884990397148584e-05, "loss": 1.2394, "step": 19727 }, { "epoch": 0.7065016921231221, "grad_norm": 2.366694450378418, "learning_rate": 4.187555155323572e-05, "loss": 1.7438, "step": 19728 }, { "epoch": 0.7065375042526904, "grad_norm": 1.3919899463653564, "learning_rate": 4.186611349131235e-05, "loss": 1.5614, "step": 19729 }, { "epoch": 0.7065733163822586, "grad_norm": 1.4385210275650024, "learning_rate": 4.1856676211505465e-05, "loss": 1.6848, "step": 19730 }, { "epoch": 0.706609128511827, "grad_norm": 2.9990813732147217, "learning_rate": 4.184723971394197e-05, "loss": 1.3986, "step": 19731 }, { "epoch": 0.7066449406413953, "grad_norm": 1.7250571250915527, "learning_rate": 4.183780399874885e-05, "loss": 1.7292, "step": 19732 }, { "epoch": 0.7066807527709635, "grad_norm": 1.7180428504943848, "learning_rate": 4.182836906605309e-05, "loss": 1.2938, "step": 19733 }, { "epoch": 0.7067165649005318, "grad_norm": 1.549298882484436, "learning_rate": 4.1818934915981544e-05, "loss": 1.4342, "step": 19734 }, { "epoch": 0.7067523770301001, "grad_norm": 1.4171665906906128, "learning_rate": 4.180950154866119e-05, "loss": 1.6761, "step": 19735 }, { "epoch": 0.7067881891596683, "grad_norm": 1.4383457899093628, "learning_rate": 4.180006896421893e-05, "loss": 1.3711, "step": 19736 }, { "epoch": 0.7068240012892366, "grad_norm": 1.7060292959213257, "learning_rate": 4.179063716278171e-05, "loss": 1.7098, "step": 19737 }, { "epoch": 0.706859813418805, "grad_norm": 1.4669756889343262, "learning_rate": 4.178120614447634e-05, "loss": 1.5862, "step": 19738 }, { "epoch": 0.7068956255483733, "grad_norm": 1.5930043458938599, "learning_rate": 4.177177590942974e-05, "loss": 1.7459, "step": 19739 }, { "epoch": 0.7069314376779415, "grad_norm": 1.4487543106079102, "learning_rate": 4.176234645776883e-05, "loss": 1.6866, "step": 19740 }, { "epoch": 0.7069672498075098, "grad_norm": 1.9951848983764648, "learning_rate": 4.1752917789620395e-05, "loss": 1.4115, "step": 19741 }, { "epoch": 0.7070030619370781, "grad_norm": 1.5408639907836914, "learning_rate": 4.174348990511131e-05, "loss": 1.3569, "step": 19742 }, { "epoch": 0.7070388740666463, "grad_norm": 1.9540464878082275, "learning_rate": 4.1734062804368426e-05, "loss": 1.1295, "step": 19743 }, { "epoch": 0.7070746861962146, "grad_norm": 1.8576759099960327, "learning_rate": 4.17246364875186e-05, "loss": 1.4529, "step": 19744 }, { "epoch": 0.707110498325783, "grad_norm": 1.4856642484664917, "learning_rate": 4.171521095468859e-05, "loss": 1.6362, "step": 19745 }, { "epoch": 0.7071463104553513, "grad_norm": 1.6320254802703857, "learning_rate": 4.1705786206005235e-05, "loss": 1.6359, "step": 19746 }, { "epoch": 0.7071821225849195, "grad_norm": 1.697995901107788, "learning_rate": 4.169636224159533e-05, "loss": 1.277, "step": 19747 }, { "epoch": 0.7072179347144878, "grad_norm": 1.8926945924758911, "learning_rate": 4.1686939061585694e-05, "loss": 1.8489, "step": 19748 }, { "epoch": 0.7072537468440561, "grad_norm": 1.6354869604110718, "learning_rate": 4.167751666610309e-05, "loss": 1.7138, "step": 19749 }, { "epoch": 0.7072895589736243, "grad_norm": 1.598158597946167, "learning_rate": 4.166809505527418e-05, "loss": 1.3413, "step": 19750 }, { "epoch": 0.7073253711031926, "grad_norm": 1.6339963674545288, "learning_rate": 4.165867422922589e-05, "loss": 1.4861, "step": 19751 }, { "epoch": 0.707361183232761, "grad_norm": 1.544044852256775, "learning_rate": 4.1649254188084854e-05, "loss": 1.2516, "step": 19752 }, { "epoch": 0.7073969953623293, "grad_norm": 1.874853491783142, "learning_rate": 4.1639834931977864e-05, "loss": 1.657, "step": 19753 }, { "epoch": 0.7074328074918975, "grad_norm": 1.7855067253112793, "learning_rate": 4.163041646103154e-05, "loss": 1.4241, "step": 19754 }, { "epoch": 0.7074686196214658, "grad_norm": 2.4784038066864014, "learning_rate": 4.162099877537274e-05, "loss": 1.1453, "step": 19755 }, { "epoch": 0.7075044317510341, "grad_norm": 1.4555649757385254, "learning_rate": 4.161158187512808e-05, "loss": 1.6522, "step": 19756 }, { "epoch": 0.7075402438806023, "grad_norm": 2.270052909851074, "learning_rate": 4.160216576042426e-05, "loss": 1.7847, "step": 19757 }, { "epoch": 0.7075760560101706, "grad_norm": 1.4863038063049316, "learning_rate": 4.159275043138801e-05, "loss": 1.3766, "step": 19758 }, { "epoch": 0.707611868139739, "grad_norm": 1.55003023147583, "learning_rate": 4.1583335888145915e-05, "loss": 1.733, "step": 19759 }, { "epoch": 0.7076476802693072, "grad_norm": 1.7473454475402832, "learning_rate": 4.1573922130824725e-05, "loss": 1.4507, "step": 19760 }, { "epoch": 0.7076834923988755, "grad_norm": 1.6226022243499756, "learning_rate": 4.156450915955099e-05, "loss": 1.676, "step": 19761 }, { "epoch": 0.7077193045284438, "grad_norm": 2.1244826316833496, "learning_rate": 4.155509697445147e-05, "loss": 1.7523, "step": 19762 }, { "epoch": 0.707755116658012, "grad_norm": 1.449433445930481, "learning_rate": 4.1545685575652695e-05, "loss": 1.6795, "step": 19763 }, { "epoch": 0.7077909287875803, "grad_norm": 1.377442717552185, "learning_rate": 4.1536274963281355e-05, "loss": 1.5041, "step": 19764 }, { "epoch": 0.7078267409171486, "grad_norm": 1.4417132139205933, "learning_rate": 4.152686513746399e-05, "loss": 1.5012, "step": 19765 }, { "epoch": 0.707862553046717, "grad_norm": 1.397241473197937, "learning_rate": 4.151745609832722e-05, "loss": 1.2836, "step": 19766 }, { "epoch": 0.7078983651762852, "grad_norm": 1.9713245630264282, "learning_rate": 4.150804784599769e-05, "loss": 1.3324, "step": 19767 }, { "epoch": 0.7079341773058535, "grad_norm": 1.558300256729126, "learning_rate": 4.149864038060185e-05, "loss": 1.6347, "step": 19768 }, { "epoch": 0.7079699894354218, "grad_norm": 1.6256293058395386, "learning_rate": 4.148923370226642e-05, "loss": 1.4837, "step": 19769 }, { "epoch": 0.70800580156499, "grad_norm": 1.6766290664672852, "learning_rate": 4.147982781111783e-05, "loss": 1.3036, "step": 19770 }, { "epoch": 0.7080416136945583, "grad_norm": 1.964269757270813, "learning_rate": 4.147042270728272e-05, "loss": 1.138, "step": 19771 }, { "epoch": 0.7080774258241266, "grad_norm": 1.2855141162872314, "learning_rate": 4.146101839088749e-05, "loss": 1.3127, "step": 19772 }, { "epoch": 0.708113237953695, "grad_norm": 1.4238767623901367, "learning_rate": 4.145161486205883e-05, "loss": 1.6241, "step": 19773 }, { "epoch": 0.7081490500832632, "grad_norm": 1.4786126613616943, "learning_rate": 4.144221212092316e-05, "loss": 1.4925, "step": 19774 }, { "epoch": 0.7081848622128315, "grad_norm": 1.7244206666946411, "learning_rate": 4.1432810167606964e-05, "loss": 1.5353, "step": 19775 }, { "epoch": 0.7082206743423998, "grad_norm": 1.447631597518921, "learning_rate": 4.1423409002236755e-05, "loss": 1.3844, "step": 19776 }, { "epoch": 0.708256486471968, "grad_norm": 1.4471874237060547, "learning_rate": 4.141400862493903e-05, "loss": 1.3439, "step": 19777 }, { "epoch": 0.7082922986015363, "grad_norm": 1.7642822265625, "learning_rate": 4.140460903584027e-05, "loss": 1.7151, "step": 19778 }, { "epoch": 0.7083281107311046, "grad_norm": 1.674778699874878, "learning_rate": 4.139521023506688e-05, "loss": 1.6022, "step": 19779 }, { "epoch": 0.708363922860673, "grad_norm": 1.7055083513259888, "learning_rate": 4.1385812222745344e-05, "loss": 1.5398, "step": 19780 }, { "epoch": 0.7083997349902412, "grad_norm": 2.1311376094818115, "learning_rate": 4.13764149990021e-05, "loss": 1.2179, "step": 19781 }, { "epoch": 0.7084355471198095, "grad_norm": 2.307271957397461, "learning_rate": 4.136701856396361e-05, "loss": 1.3747, "step": 19782 }, { "epoch": 0.7084713592493778, "grad_norm": 1.5620884895324707, "learning_rate": 4.135762291775622e-05, "loss": 1.3208, "step": 19783 }, { "epoch": 0.708507171378946, "grad_norm": 1.4920231103897095, "learning_rate": 4.1348228060506364e-05, "loss": 1.3738, "step": 19784 }, { "epoch": 0.7085429835085143, "grad_norm": 1.612851858139038, "learning_rate": 4.133883399234049e-05, "loss": 1.3598, "step": 19785 }, { "epoch": 0.7085787956380826, "grad_norm": 1.8075392246246338, "learning_rate": 4.132944071338489e-05, "loss": 1.9048, "step": 19786 }, { "epoch": 0.708614607767651, "grad_norm": 1.3424999713897705, "learning_rate": 4.132004822376598e-05, "loss": 1.2624, "step": 19787 }, { "epoch": 0.7086504198972192, "grad_norm": 1.75864577293396, "learning_rate": 4.1310656523610144e-05, "loss": 1.2026, "step": 19788 }, { "epoch": 0.7086862320267875, "grad_norm": 1.7845993041992188, "learning_rate": 4.130126561304376e-05, "loss": 1.5058, "step": 19789 }, { "epoch": 0.7087220441563558, "grad_norm": 2.524739980697632, "learning_rate": 4.129187549219308e-05, "loss": 1.5992, "step": 19790 }, { "epoch": 0.708757856285924, "grad_norm": 1.766992211341858, "learning_rate": 4.12824861611845e-05, "loss": 1.4969, "step": 19791 }, { "epoch": 0.7087936684154923, "grad_norm": 1.8509896993637085, "learning_rate": 4.127309762014435e-05, "loss": 1.6552, "step": 19792 }, { "epoch": 0.7088294805450606, "grad_norm": 1.721527338027954, "learning_rate": 4.12637098691989e-05, "loss": 1.4734, "step": 19793 }, { "epoch": 0.7088652926746289, "grad_norm": 2.155327558517456, "learning_rate": 4.125432290847446e-05, "loss": 1.1883, "step": 19794 }, { "epoch": 0.7089011048041972, "grad_norm": 1.6428849697113037, "learning_rate": 4.124493673809733e-05, "loss": 1.1671, "step": 19795 }, { "epoch": 0.7089369169337655, "grad_norm": 3.388340711593628, "learning_rate": 4.123555135819382e-05, "loss": 1.6063, "step": 19796 }, { "epoch": 0.7089727290633338, "grad_norm": 1.632297396659851, "learning_rate": 4.122616676889014e-05, "loss": 1.5945, "step": 19797 }, { "epoch": 0.709008541192902, "grad_norm": 1.713770866394043, "learning_rate": 4.121678297031256e-05, "loss": 1.6018, "step": 19798 }, { "epoch": 0.7090443533224703, "grad_norm": 2.2215230464935303, "learning_rate": 4.1207399962587356e-05, "loss": 1.5367, "step": 19799 }, { "epoch": 0.7090801654520386, "grad_norm": 1.535365343093872, "learning_rate": 4.119801774584077e-05, "loss": 1.2133, "step": 19800 }, { "epoch": 0.7091159775816069, "grad_norm": 1.8413805961608887, "learning_rate": 4.118863632019898e-05, "loss": 1.5743, "step": 19801 }, { "epoch": 0.7091517897111752, "grad_norm": 2.152158260345459, "learning_rate": 4.117925568578822e-05, "loss": 1.3583, "step": 19802 }, { "epoch": 0.7091876018407435, "grad_norm": 1.7258599996566772, "learning_rate": 4.116987584273474e-05, "loss": 1.077, "step": 19803 }, { "epoch": 0.7092234139703117, "grad_norm": 1.4642314910888672, "learning_rate": 4.116049679116466e-05, "loss": 1.1684, "step": 19804 }, { "epoch": 0.70925922609988, "grad_norm": 1.8263181447982788, "learning_rate": 4.11511185312042e-05, "loss": 1.695, "step": 19805 }, { "epoch": 0.7092950382294483, "grad_norm": 1.8259236812591553, "learning_rate": 4.114174106297952e-05, "loss": 1.166, "step": 19806 }, { "epoch": 0.7093308503590166, "grad_norm": 1.4869673252105713, "learning_rate": 4.113236438661684e-05, "loss": 1.2245, "step": 19807 }, { "epoch": 0.7093666624885849, "grad_norm": 1.906758427619934, "learning_rate": 4.112298850224223e-05, "loss": 1.4127, "step": 19808 }, { "epoch": 0.7094024746181532, "grad_norm": 1.2898838520050049, "learning_rate": 4.111361340998186e-05, "loss": 1.4856, "step": 19809 }, { "epoch": 0.7094382867477215, "grad_norm": 1.67406165599823, "learning_rate": 4.11042391099619e-05, "loss": 1.3279, "step": 19810 }, { "epoch": 0.7094740988772897, "grad_norm": 1.590075969696045, "learning_rate": 4.109486560230839e-05, "loss": 1.5435, "step": 19811 }, { "epoch": 0.709509911006858, "grad_norm": 1.462267518043518, "learning_rate": 4.108549288714748e-05, "loss": 1.507, "step": 19812 }, { "epoch": 0.7095457231364263, "grad_norm": 1.8588240146636963, "learning_rate": 4.107612096460528e-05, "loss": 1.4161, "step": 19813 }, { "epoch": 0.7095815352659945, "grad_norm": 1.5106000900268555, "learning_rate": 4.1066749834807895e-05, "loss": 1.3721, "step": 19814 }, { "epoch": 0.7096173473955629, "grad_norm": 1.4201984405517578, "learning_rate": 4.105737949788133e-05, "loss": 1.5243, "step": 19815 }, { "epoch": 0.7096531595251312, "grad_norm": 1.4662309885025024, "learning_rate": 4.10480099539517e-05, "loss": 1.6587, "step": 19816 }, { "epoch": 0.7096889716546995, "grad_norm": 1.658463954925537, "learning_rate": 4.103864120314506e-05, "loss": 1.4674, "step": 19817 }, { "epoch": 0.7097247837842677, "grad_norm": 1.8113993406295776, "learning_rate": 4.1029273245587476e-05, "loss": 1.505, "step": 19818 }, { "epoch": 0.709760595913836, "grad_norm": 2.001314878463745, "learning_rate": 4.101990608140492e-05, "loss": 1.4655, "step": 19819 }, { "epoch": 0.7097964080434043, "grad_norm": 1.4082540273666382, "learning_rate": 4.101053971072345e-05, "loss": 1.414, "step": 19820 }, { "epoch": 0.7098322201729725, "grad_norm": 1.8123232126235962, "learning_rate": 4.1001174133669116e-05, "loss": 1.5416, "step": 19821 }, { "epoch": 0.7098680323025409, "grad_norm": 1.7902493476867676, "learning_rate": 4.099180935036784e-05, "loss": 1.4017, "step": 19822 }, { "epoch": 0.7099038444321092, "grad_norm": 1.911616563796997, "learning_rate": 4.0982445360945654e-05, "loss": 1.6244, "step": 19823 }, { "epoch": 0.7099396565616775, "grad_norm": 1.7272944450378418, "learning_rate": 4.097308216552854e-05, "loss": 1.5453, "step": 19824 }, { "epoch": 0.7099754686912457, "grad_norm": 1.4212089776992798, "learning_rate": 4.0963719764242504e-05, "loss": 1.5528, "step": 19825 }, { "epoch": 0.710011280820814, "grad_norm": 2.0135669708251953, "learning_rate": 4.0954358157213436e-05, "loss": 1.2744, "step": 19826 }, { "epoch": 0.7100470929503823, "grad_norm": 1.7139232158660889, "learning_rate": 4.0944997344567304e-05, "loss": 1.3807, "step": 19827 }, { "epoch": 0.7100829050799505, "grad_norm": 1.644747018814087, "learning_rate": 4.0935637326430095e-05, "loss": 1.3795, "step": 19828 }, { "epoch": 0.7101187172095189, "grad_norm": 1.47983717918396, "learning_rate": 4.092627810292767e-05, "loss": 1.5415, "step": 19829 }, { "epoch": 0.7101545293390872, "grad_norm": 1.429914951324463, "learning_rate": 4.0916919674185974e-05, "loss": 1.4195, "step": 19830 }, { "epoch": 0.7101903414686555, "grad_norm": 1.6520344018936157, "learning_rate": 4.09075620403309e-05, "loss": 1.4766, "step": 19831 }, { "epoch": 0.7102261535982237, "grad_norm": 1.82496976852417, "learning_rate": 4.0898205201488404e-05, "loss": 1.6098, "step": 19832 }, { "epoch": 0.710261965727792, "grad_norm": 1.775235891342163, "learning_rate": 4.088884915778427e-05, "loss": 1.431, "step": 19833 }, { "epoch": 0.7102977778573603, "grad_norm": 1.3506832122802734, "learning_rate": 4.087949390934443e-05, "loss": 1.537, "step": 19834 }, { "epoch": 0.7103335899869285, "grad_norm": 1.5820268392562866, "learning_rate": 4.0870139456294745e-05, "loss": 1.2898, "step": 19835 }, { "epoch": 0.7103694021164969, "grad_norm": 1.426241397857666, "learning_rate": 4.0860785798761094e-05, "loss": 1.3136, "step": 19836 }, { "epoch": 0.7104052142460652, "grad_norm": 1.4646059274673462, "learning_rate": 4.0851432936869296e-05, "loss": 1.1997, "step": 19837 }, { "epoch": 0.7104410263756334, "grad_norm": 1.9413141012191772, "learning_rate": 4.0842080870745084e-05, "loss": 1.5866, "step": 19838 }, { "epoch": 0.7104768385052017, "grad_norm": 1.6418945789337158, "learning_rate": 4.083272960051444e-05, "loss": 1.4602, "step": 19839 }, { "epoch": 0.71051265063477, "grad_norm": 1.7183191776275635, "learning_rate": 4.0823379126303064e-05, "loss": 1.2933, "step": 19840 }, { "epoch": 0.7105484627643383, "grad_norm": 1.4567809104919434, "learning_rate": 4.0814029448236803e-05, "loss": 1.3024, "step": 19841 }, { "epoch": 0.7105842748939065, "grad_norm": 3.3251304626464844, "learning_rate": 4.080468056644141e-05, "loss": 1.2703, "step": 19842 }, { "epoch": 0.7106200870234749, "grad_norm": 2.0006585121154785, "learning_rate": 4.0795332481042736e-05, "loss": 1.1981, "step": 19843 }, { "epoch": 0.7106558991530432, "grad_norm": 1.7054616212844849, "learning_rate": 4.078598519216645e-05, "loss": 1.5251, "step": 19844 }, { "epoch": 0.7106917112826114, "grad_norm": 2.462589979171753, "learning_rate": 4.077663869993835e-05, "loss": 1.3953, "step": 19845 }, { "epoch": 0.7107275234121797, "grad_norm": 1.619739055633545, "learning_rate": 4.076729300448423e-05, "loss": 1.3919, "step": 19846 }, { "epoch": 0.710763335541748, "grad_norm": 1.6942226886749268, "learning_rate": 4.075794810592973e-05, "loss": 1.2181, "step": 19847 }, { "epoch": 0.7107991476713162, "grad_norm": 1.8538376092910767, "learning_rate": 4.074860400440067e-05, "loss": 1.4492, "step": 19848 }, { "epoch": 0.7108349598008845, "grad_norm": 1.5637342929840088, "learning_rate": 4.073926070002264e-05, "loss": 1.4442, "step": 19849 }, { "epoch": 0.7108707719304529, "grad_norm": 1.4168877601623535, "learning_rate": 4.072991819292148e-05, "loss": 1.9249, "step": 19850 }, { "epoch": 0.7109065840600212, "grad_norm": 1.5848156213760376, "learning_rate": 4.0720576483222795e-05, "loss": 1.3855, "step": 19851 }, { "epoch": 0.7109423961895894, "grad_norm": 1.6019319295883179, "learning_rate": 4.0711235571052306e-05, "loss": 1.6355, "step": 19852 }, { "epoch": 0.7109782083191577, "grad_norm": 1.3284941911697388, "learning_rate": 4.070189545653561e-05, "loss": 1.214, "step": 19853 }, { "epoch": 0.711014020448726, "grad_norm": 1.7176045179367065, "learning_rate": 4.069255613979849e-05, "loss": 1.523, "step": 19854 }, { "epoch": 0.7110498325782942, "grad_norm": 1.7608132362365723, "learning_rate": 4.068321762096652e-05, "loss": 1.1746, "step": 19855 }, { "epoch": 0.7110856447078625, "grad_norm": 1.74787437915802, "learning_rate": 4.067387990016528e-05, "loss": 1.2927, "step": 19856 }, { "epoch": 0.7111214568374309, "grad_norm": 1.6266214847564697, "learning_rate": 4.0664542977520526e-05, "loss": 1.3775, "step": 19857 }, { "epoch": 0.7111572689669992, "grad_norm": 1.6558783054351807, "learning_rate": 4.065520685315777e-05, "loss": 1.3691, "step": 19858 }, { "epoch": 0.7111930810965674, "grad_norm": 1.853967308998108, "learning_rate": 4.0645871527202695e-05, "loss": 1.3375, "step": 19859 }, { "epoch": 0.7112288932261357, "grad_norm": 1.459820032119751, "learning_rate": 4.063653699978079e-05, "loss": 1.3781, "step": 19860 }, { "epoch": 0.711264705355704, "grad_norm": 1.3726294040679932, "learning_rate": 4.062720327101778e-05, "loss": 1.5798, "step": 19861 }, { "epoch": 0.7113005174852722, "grad_norm": 1.7460395097732544, "learning_rate": 4.0617870341039155e-05, "loss": 1.499, "step": 19862 }, { "epoch": 0.7113363296148405, "grad_norm": 1.8195322751998901, "learning_rate": 4.060853820997046e-05, "loss": 1.3855, "step": 19863 }, { "epoch": 0.7113721417444089, "grad_norm": 1.6731899976730347, "learning_rate": 4.059920687793727e-05, "loss": 1.6307, "step": 19864 }, { "epoch": 0.7114079538739772, "grad_norm": 1.573760986328125, "learning_rate": 4.058987634506514e-05, "loss": 1.4133, "step": 19865 }, { "epoch": 0.7114437660035454, "grad_norm": 1.6020008325576782, "learning_rate": 4.058054661147961e-05, "loss": 1.6497, "step": 19866 }, { "epoch": 0.7114795781331137, "grad_norm": 1.3755627870559692, "learning_rate": 4.057121767730612e-05, "loss": 1.3899, "step": 19867 }, { "epoch": 0.711515390262682, "grad_norm": 1.3662739992141724, "learning_rate": 4.05618895426703e-05, "loss": 1.5769, "step": 19868 }, { "epoch": 0.7115512023922502, "grad_norm": 1.8042664527893066, "learning_rate": 4.055256220769755e-05, "loss": 1.3367, "step": 19869 }, { "epoch": 0.7115870145218185, "grad_norm": 1.653998851776123, "learning_rate": 4.0543235672513434e-05, "loss": 1.4023, "step": 19870 }, { "epoch": 0.7116228266513869, "grad_norm": 2.058856964111328, "learning_rate": 4.0533909937243365e-05, "loss": 1.3852, "step": 19871 }, { "epoch": 0.7116586387809551, "grad_norm": 1.6064378023147583, "learning_rate": 4.0524585002012815e-05, "loss": 1.3659, "step": 19872 }, { "epoch": 0.7116944509105234, "grad_norm": 1.5606815814971924, "learning_rate": 4.05152608669473e-05, "loss": 1.3211, "step": 19873 }, { "epoch": 0.7117302630400917, "grad_norm": 1.5185139179229736, "learning_rate": 4.0505937532172175e-05, "loss": 1.4429, "step": 19874 }, { "epoch": 0.71176607516966, "grad_norm": 1.587683081626892, "learning_rate": 4.049661499781293e-05, "loss": 1.6571, "step": 19875 }, { "epoch": 0.7118018872992282, "grad_norm": 1.8505325317382812, "learning_rate": 4.048729326399498e-05, "loss": 1.396, "step": 19876 }, { "epoch": 0.7118376994287965, "grad_norm": 1.4003081321716309, "learning_rate": 4.047797233084375e-05, "loss": 1.402, "step": 19877 }, { "epoch": 0.7118735115583649, "grad_norm": 1.828133225440979, "learning_rate": 4.0468652198484603e-05, "loss": 1.035, "step": 19878 }, { "epoch": 0.7119093236879331, "grad_norm": 1.4773956537246704, "learning_rate": 4.045933286704296e-05, "loss": 1.5263, "step": 19879 }, { "epoch": 0.7119451358175014, "grad_norm": 1.9004855155944824, "learning_rate": 4.0450014336644204e-05, "loss": 1.337, "step": 19880 }, { "epoch": 0.7119809479470697, "grad_norm": 2.385497808456421, "learning_rate": 4.0440696607413665e-05, "loss": 1.6802, "step": 19881 }, { "epoch": 0.712016760076638, "grad_norm": 1.733431339263916, "learning_rate": 4.0431379679476735e-05, "loss": 1.6468, "step": 19882 }, { "epoch": 0.7120525722062062, "grad_norm": 1.4043059349060059, "learning_rate": 4.042206355295875e-05, "loss": 1.4577, "step": 19883 }, { "epoch": 0.7120883843357745, "grad_norm": 1.3792320489883423, "learning_rate": 4.0412748227985075e-05, "loss": 1.4173, "step": 19884 }, { "epoch": 0.7121241964653429, "grad_norm": 1.3899900913238525, "learning_rate": 4.040343370468098e-05, "loss": 1.3588, "step": 19885 }, { "epoch": 0.7121600085949111, "grad_norm": 1.430406928062439, "learning_rate": 4.039411998317182e-05, "loss": 1.6367, "step": 19886 }, { "epoch": 0.7121958207244794, "grad_norm": 1.4435672760009766, "learning_rate": 4.038480706358287e-05, "loss": 1.3293, "step": 19887 }, { "epoch": 0.7122316328540477, "grad_norm": 1.3121012449264526, "learning_rate": 4.0375494946039495e-05, "loss": 1.0906, "step": 19888 }, { "epoch": 0.7122674449836159, "grad_norm": 1.6354104280471802, "learning_rate": 4.0366183630666885e-05, "loss": 1.4199, "step": 19889 }, { "epoch": 0.7123032571131842, "grad_norm": 1.617457389831543, "learning_rate": 4.035687311759036e-05, "loss": 1.2094, "step": 19890 }, { "epoch": 0.7123390692427525, "grad_norm": 2.983320713043213, "learning_rate": 4.03475634069352e-05, "loss": 1.2032, "step": 19891 }, { "epoch": 0.7123748813723209, "grad_norm": 1.9741209745407104, "learning_rate": 4.033825449882659e-05, "loss": 1.6208, "step": 19892 }, { "epoch": 0.7124106935018891, "grad_norm": 1.3680564165115356, "learning_rate": 4.032894639338981e-05, "loss": 1.7192, "step": 19893 }, { "epoch": 0.7124465056314574, "grad_norm": 1.6451799869537354, "learning_rate": 4.031963909075009e-05, "loss": 1.2906, "step": 19894 }, { "epoch": 0.7124823177610257, "grad_norm": 1.493516206741333, "learning_rate": 4.0310332591032675e-05, "loss": 1.4103, "step": 19895 }, { "epoch": 0.7125181298905939, "grad_norm": 1.9640635251998901, "learning_rate": 4.030102689436271e-05, "loss": 1.3533, "step": 19896 }, { "epoch": 0.7125539420201622, "grad_norm": 1.6041368246078491, "learning_rate": 4.0291722000865416e-05, "loss": 1.2857, "step": 19897 }, { "epoch": 0.7125897541497305, "grad_norm": 1.584389090538025, "learning_rate": 4.0282417910666025e-05, "loss": 1.5981, "step": 19898 }, { "epoch": 0.7126255662792988, "grad_norm": 1.9366203546524048, "learning_rate": 4.027311462388964e-05, "loss": 1.6245, "step": 19899 }, { "epoch": 0.7126613784088671, "grad_norm": 1.880853533744812, "learning_rate": 4.026381214066145e-05, "loss": 1.358, "step": 19900 }, { "epoch": 0.7126971905384354, "grad_norm": 1.7722489833831787, "learning_rate": 4.025451046110661e-05, "loss": 1.5929, "step": 19901 }, { "epoch": 0.7127330026680037, "grad_norm": 1.6037089824676514, "learning_rate": 4.024520958535031e-05, "loss": 1.5791, "step": 19902 }, { "epoch": 0.7127688147975719, "grad_norm": 2.314192295074463, "learning_rate": 4.023590951351759e-05, "loss": 1.5203, "step": 19903 }, { "epoch": 0.7128046269271402, "grad_norm": 2.308858633041382, "learning_rate": 4.022661024573362e-05, "loss": 1.2871, "step": 19904 }, { "epoch": 0.7128404390567085, "grad_norm": 1.604375958442688, "learning_rate": 4.0217311782123514e-05, "loss": 1.7423, "step": 19905 }, { "epoch": 0.7128762511862768, "grad_norm": 1.2021867036819458, "learning_rate": 4.020801412281239e-05, "loss": 1.246, "step": 19906 }, { "epoch": 0.7129120633158451, "grad_norm": 2.0452868938446045, "learning_rate": 4.019871726792528e-05, "loss": 1.5018, "step": 19907 }, { "epoch": 0.7129478754454134, "grad_norm": 1.4524165391921997, "learning_rate": 4.0189421217587297e-05, "loss": 1.4838, "step": 19908 }, { "epoch": 0.7129836875749817, "grad_norm": 1.3504445552825928, "learning_rate": 4.0180125971923524e-05, "loss": 1.5034, "step": 19909 }, { "epoch": 0.7130194997045499, "grad_norm": 2.032801389694214, "learning_rate": 4.017083153105897e-05, "loss": 1.5505, "step": 19910 }, { "epoch": 0.7130553118341182, "grad_norm": 1.6039384603500366, "learning_rate": 4.0161537895118695e-05, "loss": 1.3706, "step": 19911 }, { "epoch": 0.7130911239636865, "grad_norm": 1.8873577117919922, "learning_rate": 4.0152245064227745e-05, "loss": 1.2697, "step": 19912 }, { "epoch": 0.7131269360932548, "grad_norm": 1.6298884153366089, "learning_rate": 4.0142953038511176e-05, "loss": 1.422, "step": 19913 }, { "epoch": 0.7131627482228231, "grad_norm": 1.6877508163452148, "learning_rate": 4.013366181809393e-05, "loss": 1.2018, "step": 19914 }, { "epoch": 0.7131985603523914, "grad_norm": 1.8622572422027588, "learning_rate": 4.0124371403101034e-05, "loss": 1.6352, "step": 19915 }, { "epoch": 0.7132343724819596, "grad_norm": 2.3264732360839844, "learning_rate": 4.0115081793657525e-05, "loss": 1.5532, "step": 19916 }, { "epoch": 0.7132701846115279, "grad_norm": 2.014953374862671, "learning_rate": 4.010579298988832e-05, "loss": 1.7364, "step": 19917 }, { "epoch": 0.7133059967410962, "grad_norm": 2.2904231548309326, "learning_rate": 4.00965049919184e-05, "loss": 1.2316, "step": 19918 }, { "epoch": 0.7133418088706645, "grad_norm": 1.9575514793395996, "learning_rate": 4.0087217799872746e-05, "loss": 1.4247, "step": 19919 }, { "epoch": 0.7133776210002328, "grad_norm": 1.7283276319503784, "learning_rate": 4.007793141387633e-05, "loss": 1.4632, "step": 19920 }, { "epoch": 0.7134134331298011, "grad_norm": 1.2572942972183228, "learning_rate": 4.0068645834054e-05, "loss": 1.251, "step": 19921 }, { "epoch": 0.7134492452593694, "grad_norm": 1.8809939622879028, "learning_rate": 4.0059361060530755e-05, "loss": 1.8018, "step": 19922 }, { "epoch": 0.7134850573889376, "grad_norm": 2.7003350257873535, "learning_rate": 4.005007709343147e-05, "loss": 1.3071, "step": 19923 }, { "epoch": 0.7135208695185059, "grad_norm": 1.7158159017562866, "learning_rate": 4.004079393288112e-05, "loss": 1.5144, "step": 19924 }, { "epoch": 0.7135566816480742, "grad_norm": 1.4738690853118896, "learning_rate": 4.00315115790045e-05, "loss": 1.6998, "step": 19925 }, { "epoch": 0.7135924937776424, "grad_norm": 1.3729649782180786, "learning_rate": 4.002223003192654e-05, "loss": 1.2989, "step": 19926 }, { "epoch": 0.7136283059072108, "grad_norm": 1.5292845964431763, "learning_rate": 4.001294929177215e-05, "loss": 1.683, "step": 19927 }, { "epoch": 0.7136641180367791, "grad_norm": 1.4550150632858276, "learning_rate": 4.0003669358666106e-05, "loss": 1.4341, "step": 19928 }, { "epoch": 0.7136999301663474, "grad_norm": 1.1493412256240845, "learning_rate": 3.9994390232733304e-05, "loss": 1.3642, "step": 19929 }, { "epoch": 0.7137357422959156, "grad_norm": 1.5714877843856812, "learning_rate": 3.9985111914098585e-05, "loss": 1.3609, "step": 19930 }, { "epoch": 0.7137715544254839, "grad_norm": 1.6583268642425537, "learning_rate": 3.99758344028868e-05, "loss": 1.5051, "step": 19931 }, { "epoch": 0.7138073665550522, "grad_norm": 2.281008005142212, "learning_rate": 3.99665576992227e-05, "loss": 1.8181, "step": 19932 }, { "epoch": 0.7138431786846204, "grad_norm": 1.5775352716445923, "learning_rate": 3.995728180323114e-05, "loss": 1.5004, "step": 19933 }, { "epoch": 0.7138789908141888, "grad_norm": 1.927957534790039, "learning_rate": 3.994800671503694e-05, "loss": 1.4526, "step": 19934 }, { "epoch": 0.7139148029437571, "grad_norm": 1.4535878896713257, "learning_rate": 3.9938732434764805e-05, "loss": 1.5883, "step": 19935 }, { "epoch": 0.7139506150733254, "grad_norm": 1.65828537940979, "learning_rate": 3.992945896253958e-05, "loss": 1.6344, "step": 19936 }, { "epoch": 0.7139864272028936, "grad_norm": 1.9846526384353638, "learning_rate": 3.992018629848594e-05, "loss": 1.3487, "step": 19937 }, { "epoch": 0.7140222393324619, "grad_norm": 1.7143404483795166, "learning_rate": 3.991091444272876e-05, "loss": 1.2854, "step": 19938 }, { "epoch": 0.7140580514620302, "grad_norm": 1.7557034492492676, "learning_rate": 3.9901643395392685e-05, "loss": 1.5111, "step": 19939 }, { "epoch": 0.7140938635915984, "grad_norm": 1.5067017078399658, "learning_rate": 3.989237315660248e-05, "loss": 1.4103, "step": 19940 }, { "epoch": 0.7141296757211668, "grad_norm": 2.0715649127960205, "learning_rate": 3.988310372648285e-05, "loss": 1.4079, "step": 19941 }, { "epoch": 0.7141654878507351, "grad_norm": 1.3460463285446167, "learning_rate": 3.9873835105158564e-05, "loss": 1.5776, "step": 19942 }, { "epoch": 0.7142012999803034, "grad_norm": 1.6910752058029175, "learning_rate": 3.9864567292754266e-05, "loss": 1.6837, "step": 19943 }, { "epoch": 0.7142371121098716, "grad_norm": 2.667029619216919, "learning_rate": 3.985530028939456e-05, "loss": 1.3131, "step": 19944 }, { "epoch": 0.7142729242394399, "grad_norm": 1.554051160812378, "learning_rate": 3.9846034095204285e-05, "loss": 1.3344, "step": 19945 }, { "epoch": 0.7143087363690082, "grad_norm": 1.4796596765518188, "learning_rate": 3.9836768710308e-05, "loss": 1.6145, "step": 19946 }, { "epoch": 0.7143445484985764, "grad_norm": 1.7634326219558716, "learning_rate": 3.982750413483043e-05, "loss": 1.2404, "step": 19947 }, { "epoch": 0.7143803606281448, "grad_norm": 1.5787911415100098, "learning_rate": 3.981824036889609e-05, "loss": 1.3975, "step": 19948 }, { "epoch": 0.7144161727577131, "grad_norm": 1.527350902557373, "learning_rate": 3.9808977412629764e-05, "loss": 1.4577, "step": 19949 }, { "epoch": 0.7144519848872813, "grad_norm": 1.693781852722168, "learning_rate": 3.979971526615598e-05, "loss": 1.1326, "step": 19950 }, { "epoch": 0.7144877970168496, "grad_norm": 1.5585476160049438, "learning_rate": 3.9790453929599384e-05, "loss": 1.2612, "step": 19951 }, { "epoch": 0.7145236091464179, "grad_norm": 1.7941901683807373, "learning_rate": 3.978119340308458e-05, "loss": 1.5067, "step": 19952 }, { "epoch": 0.7145594212759862, "grad_norm": 1.4558805227279663, "learning_rate": 3.977193368673612e-05, "loss": 1.2185, "step": 19953 }, { "epoch": 0.7145952334055544, "grad_norm": 1.8235039710998535, "learning_rate": 3.976267478067863e-05, "loss": 1.4494, "step": 19954 }, { "epoch": 0.7146310455351228, "grad_norm": 2.5660204887390137, "learning_rate": 3.975341668503659e-05, "loss": 1.5131, "step": 19955 }, { "epoch": 0.7146668576646911, "grad_norm": 2.2466602325439453, "learning_rate": 3.9744159399934676e-05, "loss": 1.6216, "step": 19956 }, { "epoch": 0.7147026697942593, "grad_norm": 1.3325560092926025, "learning_rate": 3.973490292549735e-05, "loss": 1.478, "step": 19957 }, { "epoch": 0.7147384819238276, "grad_norm": 1.7814619541168213, "learning_rate": 3.97256472618492e-05, "loss": 1.5762, "step": 19958 }, { "epoch": 0.7147742940533959, "grad_norm": 1.7509052753448486, "learning_rate": 3.971639240911468e-05, "loss": 1.2004, "step": 19959 }, { "epoch": 0.7148101061829641, "grad_norm": 1.4850753545761108, "learning_rate": 3.970713836741834e-05, "loss": 1.1819, "step": 19960 }, { "epoch": 0.7148459183125324, "grad_norm": 1.3009765148162842, "learning_rate": 3.9697885136884716e-05, "loss": 1.5121, "step": 19961 }, { "epoch": 0.7148817304421008, "grad_norm": 1.357944369316101, "learning_rate": 3.968863271763822e-05, "loss": 1.4413, "step": 19962 }, { "epoch": 0.7149175425716691, "grad_norm": 1.5240039825439453, "learning_rate": 3.967938110980338e-05, "loss": 1.3346, "step": 19963 }, { "epoch": 0.7149533547012373, "grad_norm": 1.7359546422958374, "learning_rate": 3.9670130313504675e-05, "loss": 1.5019, "step": 19964 }, { "epoch": 0.7149891668308056, "grad_norm": 1.9136874675750732, "learning_rate": 3.9660880328866556e-05, "loss": 1.3177, "step": 19965 }, { "epoch": 0.7150249789603739, "grad_norm": 1.4809314012527466, "learning_rate": 3.96516311560134e-05, "loss": 1.5468, "step": 19966 }, { "epoch": 0.7150607910899421, "grad_norm": 1.871216058731079, "learning_rate": 3.964238279506979e-05, "loss": 1.6134, "step": 19967 }, { "epoch": 0.7150966032195104, "grad_norm": 1.5043405294418335, "learning_rate": 3.963313524616005e-05, "loss": 1.3557, "step": 19968 }, { "epoch": 0.7151324153490788, "grad_norm": 1.8438221216201782, "learning_rate": 3.962388850940857e-05, "loss": 1.5986, "step": 19969 }, { "epoch": 0.7151682274786471, "grad_norm": 1.7853418588638306, "learning_rate": 3.9614642584939784e-05, "loss": 1.4494, "step": 19970 }, { "epoch": 0.7152040396082153, "grad_norm": 1.7872045040130615, "learning_rate": 3.96053974728781e-05, "loss": 1.6464, "step": 19971 }, { "epoch": 0.7152398517377836, "grad_norm": 1.82921302318573, "learning_rate": 3.9596153173347925e-05, "loss": 1.6829, "step": 19972 }, { "epoch": 0.7152756638673519, "grad_norm": 1.4460041522979736, "learning_rate": 3.958690968647356e-05, "loss": 1.5682, "step": 19973 }, { "epoch": 0.7153114759969201, "grad_norm": 2.098288059234619, "learning_rate": 3.9577667012379395e-05, "loss": 1.6599, "step": 19974 }, { "epoch": 0.7153472881264884, "grad_norm": 1.5381921529769897, "learning_rate": 3.956842515118978e-05, "loss": 1.543, "step": 19975 }, { "epoch": 0.7153831002560568, "grad_norm": 1.5164440870285034, "learning_rate": 3.955918410302909e-05, "loss": 1.3406, "step": 19976 }, { "epoch": 0.715418912385625, "grad_norm": 1.4968868494033813, "learning_rate": 3.954994386802158e-05, "loss": 1.4984, "step": 19977 }, { "epoch": 0.7154547245151933, "grad_norm": 1.6591699123382568, "learning_rate": 3.95407044462916e-05, "loss": 1.326, "step": 19978 }, { "epoch": 0.7154905366447616, "grad_norm": 1.5657908916473389, "learning_rate": 3.953146583796349e-05, "loss": 1.2784, "step": 19979 }, { "epoch": 0.7155263487743299, "grad_norm": 1.5644398927688599, "learning_rate": 3.952222804316148e-05, "loss": 1.7664, "step": 19980 }, { "epoch": 0.7155621609038981, "grad_norm": 1.6657533645629883, "learning_rate": 3.9512991062009874e-05, "loss": 1.3903, "step": 19981 }, { "epoch": 0.7155979730334664, "grad_norm": 1.300279140472412, "learning_rate": 3.9503754894632947e-05, "loss": 1.5107, "step": 19982 }, { "epoch": 0.7156337851630348, "grad_norm": 1.9345062971115112, "learning_rate": 3.949451954115501e-05, "loss": 1.638, "step": 19983 }, { "epoch": 0.715669597292603, "grad_norm": 1.9588655233383179, "learning_rate": 3.948528500170021e-05, "loss": 1.377, "step": 19984 }, { "epoch": 0.7157054094221713, "grad_norm": 1.5046672821044922, "learning_rate": 3.9476051276392853e-05, "loss": 1.4962, "step": 19985 }, { "epoch": 0.7157412215517396, "grad_norm": 1.284699559211731, "learning_rate": 3.946681836535721e-05, "loss": 1.0213, "step": 19986 }, { "epoch": 0.7157770336813079, "grad_norm": 1.8973195552825928, "learning_rate": 3.945758626871738e-05, "loss": 1.608, "step": 19987 }, { "epoch": 0.7158128458108761, "grad_norm": 1.9553606510162354, "learning_rate": 3.9448354986597645e-05, "loss": 1.344, "step": 19988 }, { "epoch": 0.7158486579404444, "grad_norm": 1.8713107109069824, "learning_rate": 3.943912451912219e-05, "loss": 1.4795, "step": 19989 }, { "epoch": 0.7158844700700128, "grad_norm": 2.1087417602539062, "learning_rate": 3.9429894866415226e-05, "loss": 1.3713, "step": 19990 }, { "epoch": 0.715920282199581, "grad_norm": 1.916988492012024, "learning_rate": 3.9420666028600874e-05, "loss": 1.7671, "step": 19991 }, { "epoch": 0.7159560943291493, "grad_norm": 1.5088728666305542, "learning_rate": 3.9411438005803305e-05, "loss": 1.2984, "step": 19992 }, { "epoch": 0.7159919064587176, "grad_norm": 1.6458569765090942, "learning_rate": 3.9402210798146686e-05, "loss": 1.4498, "step": 19993 }, { "epoch": 0.7160277185882858, "grad_norm": 1.7323598861694336, "learning_rate": 3.939298440575519e-05, "loss": 1.6207, "step": 19994 }, { "epoch": 0.7160635307178541, "grad_norm": 1.744994044303894, "learning_rate": 3.9383758828752884e-05, "loss": 1.537, "step": 19995 }, { "epoch": 0.7160993428474224, "grad_norm": 1.408331036567688, "learning_rate": 3.93745340672639e-05, "loss": 1.5115, "step": 19996 }, { "epoch": 0.7161351549769908, "grad_norm": 1.3587737083435059, "learning_rate": 3.936531012141241e-05, "loss": 1.3417, "step": 19997 }, { "epoch": 0.716170967106559, "grad_norm": 1.5102040767669678, "learning_rate": 3.935608699132242e-05, "loss": 1.1886, "step": 19998 }, { "epoch": 0.7162067792361273, "grad_norm": 1.4365791082382202, "learning_rate": 3.9346864677118046e-05, "loss": 1.4104, "step": 19999 }, { "epoch": 0.7162425913656956, "grad_norm": 1.6736547946929932, "learning_rate": 3.9337643178923376e-05, "loss": 1.2907, "step": 20000 }, { "epoch": 0.7162784034952638, "grad_norm": 1.6852706670761108, "learning_rate": 3.932842249686251e-05, "loss": 1.5204, "step": 20001 }, { "epoch": 0.7163142156248321, "grad_norm": 1.6308528184890747, "learning_rate": 3.9319202631059414e-05, "loss": 1.3969, "step": 20002 }, { "epoch": 0.7163500277544004, "grad_norm": 1.8392781019210815, "learning_rate": 3.9309983581638173e-05, "loss": 1.5858, "step": 20003 }, { "epoch": 0.7163858398839686, "grad_norm": 1.9511610269546509, "learning_rate": 3.9300765348722854e-05, "loss": 1.1621, "step": 20004 }, { "epoch": 0.716421652013537, "grad_norm": 1.9499866962432861, "learning_rate": 3.929154793243741e-05, "loss": 1.5144, "step": 20005 }, { "epoch": 0.7164574641431053, "grad_norm": 1.5823554992675781, "learning_rate": 3.928233133290589e-05, "loss": 1.5373, "step": 20006 }, { "epoch": 0.7164932762726736, "grad_norm": 2.0642759799957275, "learning_rate": 3.927311555025227e-05, "loss": 1.6982, "step": 20007 }, { "epoch": 0.7165290884022418, "grad_norm": 1.6306183338165283, "learning_rate": 3.926390058460058e-05, "loss": 1.4509, "step": 20008 }, { "epoch": 0.7165649005318101, "grad_norm": 1.752936601638794, "learning_rate": 3.925468643607473e-05, "loss": 1.5816, "step": 20009 }, { "epoch": 0.7166007126613784, "grad_norm": 1.5219781398773193, "learning_rate": 3.9245473104798726e-05, "loss": 1.3056, "step": 20010 }, { "epoch": 0.7166365247909466, "grad_norm": 1.620758295059204, "learning_rate": 3.923626059089651e-05, "loss": 1.2589, "step": 20011 }, { "epoch": 0.716672336920515, "grad_norm": 1.6076922416687012, "learning_rate": 3.9227048894492055e-05, "loss": 1.6003, "step": 20012 }, { "epoch": 0.7167081490500833, "grad_norm": 1.5931798219680786, "learning_rate": 3.921783801570924e-05, "loss": 1.341, "step": 20013 }, { "epoch": 0.7167439611796516, "grad_norm": 1.723936676979065, "learning_rate": 3.9208627954672014e-05, "loss": 1.4163, "step": 20014 }, { "epoch": 0.7167797733092198, "grad_norm": 1.620898962020874, "learning_rate": 3.9199418711504307e-05, "loss": 1.3572, "step": 20015 }, { "epoch": 0.7168155854387881, "grad_norm": 1.4487621784210205, "learning_rate": 3.919021028632998e-05, "loss": 1.5906, "step": 20016 }, { "epoch": 0.7168513975683564, "grad_norm": 1.5113482475280762, "learning_rate": 3.918100267927292e-05, "loss": 1.6878, "step": 20017 }, { "epoch": 0.7168872096979246, "grad_norm": 1.350650668144226, "learning_rate": 3.917179589045701e-05, "loss": 1.3855, "step": 20018 }, { "epoch": 0.716923021827493, "grad_norm": 2.0095953941345215, "learning_rate": 3.9162589920006164e-05, "loss": 1.6719, "step": 20019 }, { "epoch": 0.7169588339570613, "grad_norm": 2.0317280292510986, "learning_rate": 3.9153384768044163e-05, "loss": 1.2714, "step": 20020 }, { "epoch": 0.7169946460866296, "grad_norm": 1.5975542068481445, "learning_rate": 3.9144180434694885e-05, "loss": 1.4929, "step": 20021 }, { "epoch": 0.7170304582161978, "grad_norm": 1.5322084426879883, "learning_rate": 3.91349769200822e-05, "loss": 1.0812, "step": 20022 }, { "epoch": 0.7170662703457661, "grad_norm": 1.5115982294082642, "learning_rate": 3.9125774224329845e-05, "loss": 1.4755, "step": 20023 }, { "epoch": 0.7171020824753344, "grad_norm": 1.715003252029419, "learning_rate": 3.911657234756169e-05, "loss": 1.4666, "step": 20024 }, { "epoch": 0.7171378946049026, "grad_norm": 1.540813684463501, "learning_rate": 3.9107371289901504e-05, "loss": 1.4246, "step": 20025 }, { "epoch": 0.717173706734471, "grad_norm": 1.781424880027771, "learning_rate": 3.909817105147314e-05, "loss": 1.3314, "step": 20026 }, { "epoch": 0.7172095188640393, "grad_norm": 1.4089975357055664, "learning_rate": 3.9088971632400286e-05, "loss": 1.2955, "step": 20027 }, { "epoch": 0.7172453309936075, "grad_norm": 1.7052937746047974, "learning_rate": 3.907977303280674e-05, "loss": 1.6117, "step": 20028 }, { "epoch": 0.7172811431231758, "grad_norm": 1.4896774291992188, "learning_rate": 3.907057525281628e-05, "loss": 1.4082, "step": 20029 }, { "epoch": 0.7173169552527441, "grad_norm": 1.4978755712509155, "learning_rate": 3.906137829255266e-05, "loss": 1.3267, "step": 20030 }, { "epoch": 0.7173527673823124, "grad_norm": 1.3901692628860474, "learning_rate": 3.90521821521396e-05, "loss": 1.1466, "step": 20031 }, { "epoch": 0.7173885795118806, "grad_norm": 1.3833082914352417, "learning_rate": 3.904298683170074e-05, "loss": 1.577, "step": 20032 }, { "epoch": 0.717424391641449, "grad_norm": 1.5521522760391235, "learning_rate": 3.903379233135994e-05, "loss": 1.2189, "step": 20033 }, { "epoch": 0.7174602037710173, "grad_norm": 1.3972352743148804, "learning_rate": 3.9024598651240774e-05, "loss": 1.5434, "step": 20034 }, { "epoch": 0.7174960159005855, "grad_norm": 1.4136375188827515, "learning_rate": 3.901540579146698e-05, "loss": 1.4805, "step": 20035 }, { "epoch": 0.7175318280301538, "grad_norm": 1.4638866186141968, "learning_rate": 3.900621375216226e-05, "loss": 1.5089, "step": 20036 }, { "epoch": 0.7175676401597221, "grad_norm": 1.317194938659668, "learning_rate": 3.8997022533450264e-05, "loss": 1.3669, "step": 20037 }, { "epoch": 0.7176034522892903, "grad_norm": 2.1901493072509766, "learning_rate": 3.898783213545463e-05, "loss": 1.1969, "step": 20038 }, { "epoch": 0.7176392644188586, "grad_norm": 1.7815966606140137, "learning_rate": 3.8978642558298994e-05, "loss": 1.6605, "step": 20039 }, { "epoch": 0.717675076548427, "grad_norm": 1.8504796028137207, "learning_rate": 3.8969453802107057e-05, "loss": 1.2785, "step": 20040 }, { "epoch": 0.7177108886779953, "grad_norm": 1.529250144958496, "learning_rate": 3.8960265867002364e-05, "loss": 1.6428, "step": 20041 }, { "epoch": 0.7177467008075635, "grad_norm": 1.3686065673828125, "learning_rate": 3.895107875310858e-05, "loss": 1.1473, "step": 20042 }, { "epoch": 0.7177825129371318, "grad_norm": 2.0113461017608643, "learning_rate": 3.894189246054922e-05, "loss": 1.4563, "step": 20043 }, { "epoch": 0.7178183250667001, "grad_norm": 1.429203748703003, "learning_rate": 3.893270698944802e-05, "loss": 1.3154, "step": 20044 }, { "epoch": 0.7178541371962683, "grad_norm": 2.117223024368286, "learning_rate": 3.892352233992843e-05, "loss": 1.5677, "step": 20045 }, { "epoch": 0.7178899493258366, "grad_norm": 1.57451331615448, "learning_rate": 3.89143385121141e-05, "loss": 1.3936, "step": 20046 }, { "epoch": 0.717925761455405, "grad_norm": 1.4294334650039673, "learning_rate": 3.8905155506128476e-05, "loss": 1.4473, "step": 20047 }, { "epoch": 0.7179615735849733, "grad_norm": 1.9604945182800293, "learning_rate": 3.889597332209526e-05, "loss": 0.9822, "step": 20048 }, { "epoch": 0.7179973857145415, "grad_norm": 1.716846227645874, "learning_rate": 3.888679196013789e-05, "loss": 1.7099, "step": 20049 }, { "epoch": 0.7180331978441098, "grad_norm": 1.586357831954956, "learning_rate": 3.887761142037984e-05, "loss": 1.4334, "step": 20050 }, { "epoch": 0.7180690099736781, "grad_norm": 1.982703685760498, "learning_rate": 3.886843170294475e-05, "loss": 1.7308, "step": 20051 }, { "epoch": 0.7181048221032463, "grad_norm": 2.7019457817077637, "learning_rate": 3.8859252807956035e-05, "loss": 1.4159, "step": 20052 }, { "epoch": 0.7181406342328146, "grad_norm": 1.6056925058364868, "learning_rate": 3.885007473553723e-05, "loss": 1.2037, "step": 20053 }, { "epoch": 0.718176446362383, "grad_norm": 1.4310388565063477, "learning_rate": 3.8840897485811737e-05, "loss": 1.2534, "step": 20054 }, { "epoch": 0.7182122584919512, "grad_norm": 1.3919428586959839, "learning_rate": 3.883172105890314e-05, "loss": 1.6811, "step": 20055 }, { "epoch": 0.7182480706215195, "grad_norm": 1.4286328554153442, "learning_rate": 3.8822545454934836e-05, "loss": 1.3516, "step": 20056 }, { "epoch": 0.7182838827510878, "grad_norm": 1.5007710456848145, "learning_rate": 3.881337067403022e-05, "loss": 1.6128, "step": 20057 }, { "epoch": 0.7183196948806561, "grad_norm": 1.7567851543426514, "learning_rate": 3.8804196716312805e-05, "loss": 1.6251, "step": 20058 }, { "epoch": 0.7183555070102243, "grad_norm": 1.5256688594818115, "learning_rate": 3.879502358190596e-05, "loss": 1.5159, "step": 20059 }, { "epoch": 0.7183913191397926, "grad_norm": 1.9770673513412476, "learning_rate": 3.878585127093317e-05, "loss": 1.393, "step": 20060 }, { "epoch": 0.718427131269361, "grad_norm": 1.4122111797332764, "learning_rate": 3.877667978351772e-05, "loss": 1.4472, "step": 20061 }, { "epoch": 0.7184629433989292, "grad_norm": 1.5489094257354736, "learning_rate": 3.876750911978315e-05, "loss": 1.3976, "step": 20062 }, { "epoch": 0.7184987555284975, "grad_norm": 1.4532663822174072, "learning_rate": 3.875833927985272e-05, "loss": 1.2297, "step": 20063 }, { "epoch": 0.7185345676580658, "grad_norm": 1.3802516460418701, "learning_rate": 3.8749170263849865e-05, "loss": 1.1974, "step": 20064 }, { "epoch": 0.718570379787634, "grad_norm": 1.4403727054595947, "learning_rate": 3.874000207189789e-05, "loss": 1.4378, "step": 20065 }, { "epoch": 0.7186061919172023, "grad_norm": 1.9045497179031372, "learning_rate": 3.8730834704120164e-05, "loss": 1.7718, "step": 20066 }, { "epoch": 0.7186420040467706, "grad_norm": 1.5778992176055908, "learning_rate": 3.8721668160640054e-05, "loss": 1.6047, "step": 20067 }, { "epoch": 0.718677816176339, "grad_norm": 1.8331999778747559, "learning_rate": 3.871250244158083e-05, "loss": 1.6248, "step": 20068 }, { "epoch": 0.7187136283059072, "grad_norm": 1.9123164415359497, "learning_rate": 3.870333754706583e-05, "loss": 1.5269, "step": 20069 }, { "epoch": 0.7187494404354755, "grad_norm": 2.063952922821045, "learning_rate": 3.8694173477218355e-05, "loss": 1.7413, "step": 20070 }, { "epoch": 0.7187852525650438, "grad_norm": 1.6923632621765137, "learning_rate": 3.8685010232161736e-05, "loss": 1.0536, "step": 20071 }, { "epoch": 0.718821064694612, "grad_norm": 1.7924041748046875, "learning_rate": 3.8675847812019175e-05, "loss": 1.3545, "step": 20072 }, { "epoch": 0.7188568768241803, "grad_norm": 1.7178550958633423, "learning_rate": 3.866668621691397e-05, "loss": 1.3935, "step": 20073 }, { "epoch": 0.7188926889537486, "grad_norm": 1.6161495447158813, "learning_rate": 3.8657525446969436e-05, "loss": 1.3834, "step": 20074 }, { "epoch": 0.718928501083317, "grad_norm": 2.147392988204956, "learning_rate": 3.864836550230874e-05, "loss": 1.6886, "step": 20075 }, { "epoch": 0.7189643132128852, "grad_norm": 1.8922128677368164, "learning_rate": 3.863920638305512e-05, "loss": 1.468, "step": 20076 }, { "epoch": 0.7190001253424535, "grad_norm": 1.7388283014297485, "learning_rate": 3.863004808933186e-05, "loss": 1.554, "step": 20077 }, { "epoch": 0.7190359374720218, "grad_norm": 1.6552281379699707, "learning_rate": 3.8620890621262164e-05, "loss": 1.6504, "step": 20078 }, { "epoch": 0.71907174960159, "grad_norm": 1.4679269790649414, "learning_rate": 3.8611733978969176e-05, "loss": 1.451, "step": 20079 }, { "epoch": 0.7191075617311583, "grad_norm": 1.8459839820861816, "learning_rate": 3.860257816257612e-05, "loss": 1.4404, "step": 20080 }, { "epoch": 0.7191433738607266, "grad_norm": 1.5493810176849365, "learning_rate": 3.859342317220619e-05, "loss": 1.3396, "step": 20081 }, { "epoch": 0.719179185990295, "grad_norm": 1.4488211870193481, "learning_rate": 3.8584269007982565e-05, "loss": 1.5612, "step": 20082 }, { "epoch": 0.7192149981198632, "grad_norm": 2.1801438331604004, "learning_rate": 3.857511567002835e-05, "loss": 1.1745, "step": 20083 }, { "epoch": 0.7192508102494315, "grad_norm": 1.371329665184021, "learning_rate": 3.8565963158466714e-05, "loss": 1.4076, "step": 20084 }, { "epoch": 0.7192866223789998, "grad_norm": 1.5765669345855713, "learning_rate": 3.855681147342084e-05, "loss": 1.4628, "step": 20085 }, { "epoch": 0.719322434508568, "grad_norm": 1.3415297269821167, "learning_rate": 3.854766061501378e-05, "loss": 1.6046, "step": 20086 }, { "epoch": 0.7193582466381363, "grad_norm": 1.9021408557891846, "learning_rate": 3.853851058336867e-05, "loss": 1.4807, "step": 20087 }, { "epoch": 0.7193940587677046, "grad_norm": 1.543630599975586, "learning_rate": 3.852936137860863e-05, "loss": 1.2401, "step": 20088 }, { "epoch": 0.719429870897273, "grad_norm": 1.5069166421890259, "learning_rate": 3.8520213000856763e-05, "loss": 1.6359, "step": 20089 }, { "epoch": 0.7194656830268412, "grad_norm": 1.6449099779129028, "learning_rate": 3.85110654502361e-05, "loss": 1.4342, "step": 20090 }, { "epoch": 0.7195014951564095, "grad_norm": 1.4722683429718018, "learning_rate": 3.8501918726869744e-05, "loss": 1.6538, "step": 20091 }, { "epoch": 0.7195373072859778, "grad_norm": 1.6070823669433594, "learning_rate": 3.8492772830880776e-05, "loss": 1.2967, "step": 20092 }, { "epoch": 0.719573119415546, "grad_norm": 1.3607196807861328, "learning_rate": 3.848362776239217e-05, "loss": 1.559, "step": 20093 }, { "epoch": 0.7196089315451143, "grad_norm": 1.7744579315185547, "learning_rate": 3.847448352152701e-05, "loss": 1.7096, "step": 20094 }, { "epoch": 0.7196447436746826, "grad_norm": 1.426999807357788, "learning_rate": 3.84653401084083e-05, "loss": 1.4129, "step": 20095 }, { "epoch": 0.7196805558042509, "grad_norm": 1.5814954042434692, "learning_rate": 3.8456197523159096e-05, "loss": 1.3991, "step": 20096 }, { "epoch": 0.7197163679338192, "grad_norm": 1.6949788331985474, "learning_rate": 3.844705576590235e-05, "loss": 1.2601, "step": 20097 }, { "epoch": 0.7197521800633875, "grad_norm": 1.731933832168579, "learning_rate": 3.843791483676107e-05, "loss": 1.5995, "step": 20098 }, { "epoch": 0.7197879921929558, "grad_norm": 1.780846357345581, "learning_rate": 3.842877473585823e-05, "loss": 1.2574, "step": 20099 }, { "epoch": 0.719823804322524, "grad_norm": 1.3572626113891602, "learning_rate": 3.841963546331684e-05, "loss": 1.4639, "step": 20100 }, { "epoch": 0.7198596164520923, "grad_norm": 1.2794239521026611, "learning_rate": 3.841049701925978e-05, "loss": 1.2574, "step": 20101 }, { "epoch": 0.7198954285816606, "grad_norm": 1.5886186361312866, "learning_rate": 3.840135940381006e-05, "loss": 1.3844, "step": 20102 }, { "epoch": 0.7199312407112289, "grad_norm": 2.106218099594116, "learning_rate": 3.839222261709061e-05, "loss": 1.3317, "step": 20103 }, { "epoch": 0.7199670528407972, "grad_norm": 2.0230941772460938, "learning_rate": 3.83830866592243e-05, "loss": 1.5562, "step": 20104 }, { "epoch": 0.7200028649703655, "grad_norm": 1.5232678651809692, "learning_rate": 3.8373951530334086e-05, "loss": 1.0045, "step": 20105 }, { "epoch": 0.7200386770999337, "grad_norm": 3.936558961868286, "learning_rate": 3.836481723054286e-05, "loss": 1.4348, "step": 20106 }, { "epoch": 0.720074489229502, "grad_norm": 1.2785394191741943, "learning_rate": 3.835568375997355e-05, "loss": 1.5436, "step": 20107 }, { "epoch": 0.7201103013590703, "grad_norm": 2.008934259414673, "learning_rate": 3.8346551118748967e-05, "loss": 1.3253, "step": 20108 }, { "epoch": 0.7201461134886386, "grad_norm": 1.5482290983200073, "learning_rate": 3.8337419306992e-05, "loss": 1.4049, "step": 20109 }, { "epoch": 0.7201819256182069, "grad_norm": 1.702072262763977, "learning_rate": 3.8328288324825566e-05, "loss": 1.4319, "step": 20110 }, { "epoch": 0.7202177377477752, "grad_norm": 2.1846251487731934, "learning_rate": 3.831915817237243e-05, "loss": 1.3064, "step": 20111 }, { "epoch": 0.7202535498773435, "grad_norm": 1.7581915855407715, "learning_rate": 3.831002884975544e-05, "loss": 1.4488, "step": 20112 }, { "epoch": 0.7202893620069117, "grad_norm": 1.5173016786575317, "learning_rate": 3.830090035709745e-05, "loss": 1.2531, "step": 20113 }, { "epoch": 0.72032517413648, "grad_norm": 1.3958326578140259, "learning_rate": 3.8291772694521285e-05, "loss": 1.5105, "step": 20114 }, { "epoch": 0.7203609862660483, "grad_norm": 2.2686309814453125, "learning_rate": 3.82826458621497e-05, "loss": 1.3909, "step": 20115 }, { "epoch": 0.7203967983956165, "grad_norm": 2.0584423542022705, "learning_rate": 3.82735198601055e-05, "loss": 1.6335, "step": 20116 }, { "epoch": 0.7204326105251849, "grad_norm": 1.203965187072754, "learning_rate": 3.8264394688511466e-05, "loss": 1.2574, "step": 20117 }, { "epoch": 0.7204684226547532, "grad_norm": 1.8112740516662598, "learning_rate": 3.82552703474904e-05, "loss": 1.4215, "step": 20118 }, { "epoch": 0.7205042347843215, "grad_norm": 1.7669355869293213, "learning_rate": 3.8246146837165e-05, "loss": 1.6208, "step": 20119 }, { "epoch": 0.7205400469138897, "grad_norm": 1.4590661525726318, "learning_rate": 3.823702415765803e-05, "loss": 1.3783, "step": 20120 }, { "epoch": 0.720575859043458, "grad_norm": 2.1782760620117188, "learning_rate": 3.822790230909227e-05, "loss": 1.64, "step": 20121 }, { "epoch": 0.7206116711730263, "grad_norm": 1.6818681955337524, "learning_rate": 3.821878129159037e-05, "loss": 1.6294, "step": 20122 }, { "epoch": 0.7206474833025945, "grad_norm": 1.7230101823806763, "learning_rate": 3.8209661105275077e-05, "loss": 1.3779, "step": 20123 }, { "epoch": 0.7206832954321629, "grad_norm": 1.3677054643630981, "learning_rate": 3.820054175026908e-05, "loss": 1.0637, "step": 20124 }, { "epoch": 0.7207191075617312, "grad_norm": 1.5686241388320923, "learning_rate": 3.8191423226695125e-05, "loss": 1.278, "step": 20125 }, { "epoch": 0.7207549196912995, "grad_norm": 1.5883556604385376, "learning_rate": 3.81823055346758e-05, "loss": 1.3986, "step": 20126 }, { "epoch": 0.7207907318208677, "grad_norm": 1.4147443771362305, "learning_rate": 3.817318867433383e-05, "loss": 1.3892, "step": 20127 }, { "epoch": 0.720826543950436, "grad_norm": 1.6232571601867676, "learning_rate": 3.816407264579187e-05, "loss": 1.2004, "step": 20128 }, { "epoch": 0.7208623560800043, "grad_norm": 1.8220059871673584, "learning_rate": 3.8154957449172524e-05, "loss": 1.1985, "step": 20129 }, { "epoch": 0.7208981682095725, "grad_norm": 1.627021074295044, "learning_rate": 3.814584308459849e-05, "loss": 1.3011, "step": 20130 }, { "epoch": 0.7209339803391409, "grad_norm": 1.998015284538269, "learning_rate": 3.8136729552192274e-05, "loss": 1.4227, "step": 20131 }, { "epoch": 0.7209697924687092, "grad_norm": 1.4177651405334473, "learning_rate": 3.812761685207664e-05, "loss": 1.2593, "step": 20132 }, { "epoch": 0.7210056045982774, "grad_norm": 1.8777852058410645, "learning_rate": 3.811850498437407e-05, "loss": 1.3507, "step": 20133 }, { "epoch": 0.7210414167278457, "grad_norm": 2.2260215282440186, "learning_rate": 3.81093939492072e-05, "loss": 1.3991, "step": 20134 }, { "epoch": 0.721077228857414, "grad_norm": 1.6670490503311157, "learning_rate": 3.810028374669859e-05, "loss": 1.2042, "step": 20135 }, { "epoch": 0.7211130409869823, "grad_norm": 2.2959189414978027, "learning_rate": 3.8091174376970876e-05, "loss": 1.3529, "step": 20136 }, { "epoch": 0.7211488531165505, "grad_norm": 1.5722131729125977, "learning_rate": 3.808206584014653e-05, "loss": 1.2084, "step": 20137 }, { "epoch": 0.7211846652461189, "grad_norm": 1.5049318075180054, "learning_rate": 3.807295813634807e-05, "loss": 1.6146, "step": 20138 }, { "epoch": 0.7212204773756872, "grad_norm": 2.4031875133514404, "learning_rate": 3.8063851265698134e-05, "loss": 1.467, "step": 20139 }, { "epoch": 0.7212562895052554, "grad_norm": 1.4516150951385498, "learning_rate": 3.805474522831916e-05, "loss": 1.5926, "step": 20140 }, { "epoch": 0.7212921016348237, "grad_norm": 1.6928277015686035, "learning_rate": 3.804564002433371e-05, "loss": 1.4698, "step": 20141 }, { "epoch": 0.721327913764392, "grad_norm": 2.470627546310425, "learning_rate": 3.8036535653864193e-05, "loss": 1.4727, "step": 20142 }, { "epoch": 0.7213637258939603, "grad_norm": 1.5808488130569458, "learning_rate": 3.8027432117033237e-05, "loss": 1.5674, "step": 20143 }, { "epoch": 0.7213995380235285, "grad_norm": 1.833074688911438, "learning_rate": 3.80183294139632e-05, "loss": 1.4833, "step": 20144 }, { "epoch": 0.7214353501530969, "grad_norm": 1.6991946697235107, "learning_rate": 3.8009227544776595e-05, "loss": 1.6437, "step": 20145 }, { "epoch": 0.7214711622826652, "grad_norm": 1.4071147441864014, "learning_rate": 3.80001265095959e-05, "loss": 0.9343, "step": 20146 }, { "epoch": 0.7215069744122334, "grad_norm": 1.4687845706939697, "learning_rate": 3.799102630854351e-05, "loss": 1.4609, "step": 20147 }, { "epoch": 0.7215427865418017, "grad_norm": 1.7575792074203491, "learning_rate": 3.79819269417419e-05, "loss": 1.5505, "step": 20148 }, { "epoch": 0.72157859867137, "grad_norm": 2.31231689453125, "learning_rate": 3.797282840931339e-05, "loss": 1.5541, "step": 20149 }, { "epoch": 0.7216144108009382, "grad_norm": 1.4037084579467773, "learning_rate": 3.796373071138054e-05, "loss": 1.6367, "step": 20150 }, { "epoch": 0.7216502229305065, "grad_norm": 2.1780078411102295, "learning_rate": 3.795463384806564e-05, "loss": 1.6328, "step": 20151 }, { "epoch": 0.7216860350600749, "grad_norm": 1.7256742715835571, "learning_rate": 3.794553781949114e-05, "loss": 1.3406, "step": 20152 }, { "epoch": 0.7217218471896432, "grad_norm": 1.9494801759719849, "learning_rate": 3.793644262577934e-05, "loss": 1.3391, "step": 20153 }, { "epoch": 0.7217576593192114, "grad_norm": 1.6500424146652222, "learning_rate": 3.7927348267052666e-05, "loss": 1.4641, "step": 20154 }, { "epoch": 0.7217934714487797, "grad_norm": 1.6580140590667725, "learning_rate": 3.791825474343348e-05, "loss": 1.5671, "step": 20155 }, { "epoch": 0.721829283578348, "grad_norm": 1.598800778388977, "learning_rate": 3.790916205504406e-05, "loss": 1.4017, "step": 20156 }, { "epoch": 0.7218650957079162, "grad_norm": 1.5164865255355835, "learning_rate": 3.7900070202006764e-05, "loss": 1.1569, "step": 20157 }, { "epoch": 0.7219009078374845, "grad_norm": 1.3159844875335693, "learning_rate": 3.789097918444394e-05, "loss": 1.4161, "step": 20158 }, { "epoch": 0.7219367199670529, "grad_norm": 1.960456132888794, "learning_rate": 3.78818890024779e-05, "loss": 1.3394, "step": 20159 }, { "epoch": 0.7219725320966212, "grad_norm": 1.6618950366973877, "learning_rate": 3.787279965623085e-05, "loss": 1.719, "step": 20160 }, { "epoch": 0.7220083442261894, "grad_norm": 1.2125014066696167, "learning_rate": 3.786371114582521e-05, "loss": 1.1418, "step": 20161 }, { "epoch": 0.7220441563557577, "grad_norm": 1.914084792137146, "learning_rate": 3.785462347138319e-05, "loss": 1.6195, "step": 20162 }, { "epoch": 0.722079968485326, "grad_norm": 1.5807491540908813, "learning_rate": 3.784553663302701e-05, "loss": 1.5063, "step": 20163 }, { "epoch": 0.7221157806148942, "grad_norm": 1.9150186777114868, "learning_rate": 3.783645063087896e-05, "loss": 1.2985, "step": 20164 }, { "epoch": 0.7221515927444625, "grad_norm": 2.09673810005188, "learning_rate": 3.782736546506128e-05, "loss": 1.5551, "step": 20165 }, { "epoch": 0.7221874048740309, "grad_norm": 1.5515776872634888, "learning_rate": 3.781828113569624e-05, "loss": 1.7333, "step": 20166 }, { "epoch": 0.7222232170035991, "grad_norm": 1.499321460723877, "learning_rate": 3.780919764290599e-05, "loss": 1.4817, "step": 20167 }, { "epoch": 0.7222590291331674, "grad_norm": 1.4306893348693848, "learning_rate": 3.780011498681276e-05, "loss": 1.2433, "step": 20168 }, { "epoch": 0.7222948412627357, "grad_norm": 1.8531023263931274, "learning_rate": 3.779103316753875e-05, "loss": 1.205, "step": 20169 }, { "epoch": 0.722330653392304, "grad_norm": 1.7147332429885864, "learning_rate": 3.778195218520618e-05, "loss": 1.3981, "step": 20170 }, { "epoch": 0.7223664655218722, "grad_norm": 1.5731045007705688, "learning_rate": 3.777287203993716e-05, "loss": 1.5347, "step": 20171 }, { "epoch": 0.7224022776514405, "grad_norm": 1.9253267049789429, "learning_rate": 3.7763792731853865e-05, "loss": 1.7233, "step": 20172 }, { "epoch": 0.7224380897810089, "grad_norm": 1.4065598249435425, "learning_rate": 3.77547142610785e-05, "loss": 1.1908, "step": 20173 }, { "epoch": 0.7224739019105771, "grad_norm": 1.5758453607559204, "learning_rate": 3.774563662773314e-05, "loss": 1.2734, "step": 20174 }, { "epoch": 0.7225097140401454, "grad_norm": 1.502901315689087, "learning_rate": 3.773655983193992e-05, "loss": 1.5944, "step": 20175 }, { "epoch": 0.7225455261697137, "grad_norm": 1.7962548732757568, "learning_rate": 3.772748387382099e-05, "loss": 1.2866, "step": 20176 }, { "epoch": 0.722581338299282, "grad_norm": 2.224817991256714, "learning_rate": 3.7718408753498456e-05, "loss": 1.3286, "step": 20177 }, { "epoch": 0.7226171504288502, "grad_norm": 1.7116045951843262, "learning_rate": 3.770933447109437e-05, "loss": 1.2324, "step": 20178 }, { "epoch": 0.7226529625584185, "grad_norm": 1.5392895936965942, "learning_rate": 3.7700261026730844e-05, "loss": 1.2427, "step": 20179 }, { "epoch": 0.7226887746879869, "grad_norm": 1.7307379245758057, "learning_rate": 3.7691188420529974e-05, "loss": 1.4682, "step": 20180 }, { "epoch": 0.7227245868175551, "grad_norm": 1.6550228595733643, "learning_rate": 3.768211665261375e-05, "loss": 1.5901, "step": 20181 }, { "epoch": 0.7227603989471234, "grad_norm": 1.491673469543457, "learning_rate": 3.7673045723104275e-05, "loss": 1.3454, "step": 20182 }, { "epoch": 0.7227962110766917, "grad_norm": 1.3128697872161865, "learning_rate": 3.7663975632123574e-05, "loss": 1.4408, "step": 20183 }, { "epoch": 0.7228320232062599, "grad_norm": 1.577620267868042, "learning_rate": 3.76549063797937e-05, "loss": 1.1176, "step": 20184 }, { "epoch": 0.7228678353358282, "grad_norm": 2.135578155517578, "learning_rate": 3.7645837966236605e-05, "loss": 1.4422, "step": 20185 }, { "epoch": 0.7229036474653965, "grad_norm": 1.4765774011611938, "learning_rate": 3.763677039157433e-05, "loss": 1.2351, "step": 20186 }, { "epoch": 0.7229394595949649, "grad_norm": 1.3106032609939575, "learning_rate": 3.762770365592887e-05, "loss": 1.3763, "step": 20187 }, { "epoch": 0.7229752717245331, "grad_norm": 1.6912516355514526, "learning_rate": 3.7618637759422236e-05, "loss": 1.4735, "step": 20188 }, { "epoch": 0.7230110838541014, "grad_norm": 1.7040541172027588, "learning_rate": 3.760957270217633e-05, "loss": 1.4154, "step": 20189 }, { "epoch": 0.7230468959836697, "grad_norm": 1.61090087890625, "learning_rate": 3.7600508484313146e-05, "loss": 1.4419, "step": 20190 }, { "epoch": 0.7230827081132379, "grad_norm": 1.7219356298446655, "learning_rate": 3.759144510595467e-05, "loss": 1.5413, "step": 20191 }, { "epoch": 0.7231185202428062, "grad_norm": 1.4904463291168213, "learning_rate": 3.7582382567222754e-05, "loss": 1.0032, "step": 20192 }, { "epoch": 0.7231543323723745, "grad_norm": 1.4005389213562012, "learning_rate": 3.757332086823937e-05, "loss": 1.4532, "step": 20193 }, { "epoch": 0.7231901445019429, "grad_norm": 1.4816468954086304, "learning_rate": 3.756426000912644e-05, "loss": 1.5392, "step": 20194 }, { "epoch": 0.7232259566315111, "grad_norm": 1.534940481185913, "learning_rate": 3.7555199990005874e-05, "loss": 1.1027, "step": 20195 }, { "epoch": 0.7232617687610794, "grad_norm": 1.4121631383895874, "learning_rate": 3.754614081099952e-05, "loss": 1.1582, "step": 20196 }, { "epoch": 0.7232975808906477, "grad_norm": 1.200635552406311, "learning_rate": 3.753708247222928e-05, "loss": 1.3734, "step": 20197 }, { "epoch": 0.7233333930202159, "grad_norm": 1.5664600133895874, "learning_rate": 3.752802497381706e-05, "loss": 1.4627, "step": 20198 }, { "epoch": 0.7233692051497842, "grad_norm": 1.6560693979263306, "learning_rate": 3.751896831588464e-05, "loss": 1.3806, "step": 20199 }, { "epoch": 0.7234050172793525, "grad_norm": 1.5157887935638428, "learning_rate": 3.7509912498553914e-05, "loss": 1.495, "step": 20200 }, { "epoch": 0.7234408294089208, "grad_norm": 2.2800793647766113, "learning_rate": 3.750085752194671e-05, "loss": 1.3048, "step": 20201 }, { "epoch": 0.7234766415384891, "grad_norm": 1.5321568250656128, "learning_rate": 3.749180338618488e-05, "loss": 1.5647, "step": 20202 }, { "epoch": 0.7235124536680574, "grad_norm": 1.694242238998413, "learning_rate": 3.7482750091390176e-05, "loss": 1.4978, "step": 20203 }, { "epoch": 0.7235482657976257, "grad_norm": 1.7178481817245483, "learning_rate": 3.7473697637684416e-05, "loss": 1.3819, "step": 20204 }, { "epoch": 0.7235840779271939, "grad_norm": 1.4250034093856812, "learning_rate": 3.746464602518941e-05, "loss": 1.4255, "step": 20205 }, { "epoch": 0.7236198900567622, "grad_norm": 1.6631689071655273, "learning_rate": 3.745559525402696e-05, "loss": 1.5659, "step": 20206 }, { "epoch": 0.7236557021863305, "grad_norm": 1.8999367952346802, "learning_rate": 3.744654532431876e-05, "loss": 1.5464, "step": 20207 }, { "epoch": 0.7236915143158988, "grad_norm": 1.983514428138733, "learning_rate": 3.743749623618661e-05, "loss": 1.4633, "step": 20208 }, { "epoch": 0.7237273264454671, "grad_norm": 2.2431626319885254, "learning_rate": 3.742844798975229e-05, "loss": 1.4338, "step": 20209 }, { "epoch": 0.7237631385750354, "grad_norm": 2.3390116691589355, "learning_rate": 3.7419400585137444e-05, "loss": 1.6569, "step": 20210 }, { "epoch": 0.7237989507046036, "grad_norm": 1.4298285245895386, "learning_rate": 3.741035402246385e-05, "loss": 1.4772, "step": 20211 }, { "epoch": 0.7238347628341719, "grad_norm": 1.4829427003860474, "learning_rate": 3.74013083018532e-05, "loss": 1.6161, "step": 20212 }, { "epoch": 0.7238705749637402, "grad_norm": 1.5598061084747314, "learning_rate": 3.7392263423427234e-05, "loss": 1.6156, "step": 20213 }, { "epoch": 0.7239063870933085, "grad_norm": 1.6008559465408325, "learning_rate": 3.738321938730758e-05, "loss": 1.3155, "step": 20214 }, { "epoch": 0.7239421992228768, "grad_norm": 1.7069321870803833, "learning_rate": 3.737417619361593e-05, "loss": 1.5145, "step": 20215 }, { "epoch": 0.7239780113524451, "grad_norm": 1.5327214002609253, "learning_rate": 3.7365133842473995e-05, "loss": 1.4894, "step": 20216 }, { "epoch": 0.7240138234820134, "grad_norm": 1.2927590608596802, "learning_rate": 3.735609233400336e-05, "loss": 1.4621, "step": 20217 }, { "epoch": 0.7240496356115816, "grad_norm": 1.4662754535675049, "learning_rate": 3.734705166832569e-05, "loss": 1.1987, "step": 20218 }, { "epoch": 0.7240854477411499, "grad_norm": 1.7217975854873657, "learning_rate": 3.7338011845562624e-05, "loss": 1.4568, "step": 20219 }, { "epoch": 0.7241212598707182, "grad_norm": 1.6077433824539185, "learning_rate": 3.732897286583582e-05, "loss": 1.4954, "step": 20220 }, { "epoch": 0.7241570720002865, "grad_norm": 1.8069626092910767, "learning_rate": 3.7319934729266814e-05, "loss": 1.6723, "step": 20221 }, { "epoch": 0.7241928841298548, "grad_norm": 1.7534021139144897, "learning_rate": 3.731089743597723e-05, "loss": 1.7045, "step": 20222 }, { "epoch": 0.7242286962594231, "grad_norm": 1.8184095621109009, "learning_rate": 3.7301860986088666e-05, "loss": 1.724, "step": 20223 }, { "epoch": 0.7242645083889914, "grad_norm": 2.0938992500305176, "learning_rate": 3.729282537972272e-05, "loss": 1.6204, "step": 20224 }, { "epoch": 0.7243003205185596, "grad_norm": 1.575728178024292, "learning_rate": 3.728379061700091e-05, "loss": 1.3117, "step": 20225 }, { "epoch": 0.7243361326481279, "grad_norm": 1.9121419191360474, "learning_rate": 3.727475669804474e-05, "loss": 1.3417, "step": 20226 }, { "epoch": 0.7243719447776962, "grad_norm": 2.2062628269195557, "learning_rate": 3.726572362297588e-05, "loss": 1.7373, "step": 20227 }, { "epoch": 0.7244077569072644, "grad_norm": 1.6102908849716187, "learning_rate": 3.725669139191574e-05, "loss": 1.3962, "step": 20228 }, { "epoch": 0.7244435690368328, "grad_norm": 1.7527639865875244, "learning_rate": 3.7247660004985897e-05, "loss": 1.5474, "step": 20229 }, { "epoch": 0.7244793811664011, "grad_norm": 2.023170232772827, "learning_rate": 3.723862946230784e-05, "loss": 1.3935, "step": 20230 }, { "epoch": 0.7245151932959694, "grad_norm": 1.7724480628967285, "learning_rate": 3.7229599764003096e-05, "loss": 1.3569, "step": 20231 }, { "epoch": 0.7245510054255376, "grad_norm": 1.76382315158844, "learning_rate": 3.7220570910193096e-05, "loss": 1.5279, "step": 20232 }, { "epoch": 0.7245868175551059, "grad_norm": 1.4322667121887207, "learning_rate": 3.721154290099933e-05, "loss": 1.259, "step": 20233 }, { "epoch": 0.7246226296846742, "grad_norm": 1.2567883729934692, "learning_rate": 3.7202515736543296e-05, "loss": 1.3401, "step": 20234 }, { "epoch": 0.7246584418142424, "grad_norm": 1.4475733041763306, "learning_rate": 3.7193489416946383e-05, "loss": 1.4377, "step": 20235 }, { "epoch": 0.7246942539438108, "grad_norm": 1.2449748516082764, "learning_rate": 3.718446394233007e-05, "loss": 1.5922, "step": 20236 }, { "epoch": 0.7247300660733791, "grad_norm": 1.514506220817566, "learning_rate": 3.717543931281572e-05, "loss": 1.3933, "step": 20237 }, { "epoch": 0.7247658782029474, "grad_norm": 1.5675554275512695, "learning_rate": 3.7166415528524854e-05, "loss": 1.0401, "step": 20238 }, { "epoch": 0.7248016903325156, "grad_norm": 1.6422276496887207, "learning_rate": 3.715739258957879e-05, "loss": 1.411, "step": 20239 }, { "epoch": 0.7248375024620839, "grad_norm": 1.1734343767166138, "learning_rate": 3.714837049609898e-05, "loss": 1.5028, "step": 20240 }, { "epoch": 0.7248733145916522, "grad_norm": 1.268044352531433, "learning_rate": 3.71393492482067e-05, "loss": 1.3508, "step": 20241 }, { "epoch": 0.7249091267212204, "grad_norm": 1.7053765058517456, "learning_rate": 3.713032884602346e-05, "loss": 1.4333, "step": 20242 }, { "epoch": 0.7249449388507888, "grad_norm": 2.118558406829834, "learning_rate": 3.712130928967056e-05, "loss": 1.5188, "step": 20243 }, { "epoch": 0.7249807509803571, "grad_norm": 1.3212299346923828, "learning_rate": 3.711229057926925e-05, "loss": 1.7121, "step": 20244 }, { "epoch": 0.7250165631099253, "grad_norm": 1.4363412857055664, "learning_rate": 3.710327271494103e-05, "loss": 1.4603, "step": 20245 }, { "epoch": 0.7250523752394936, "grad_norm": 2.0643067359924316, "learning_rate": 3.709425569680711e-05, "loss": 1.6963, "step": 20246 }, { "epoch": 0.7250881873690619, "grad_norm": 1.556532621383667, "learning_rate": 3.708523952498887e-05, "loss": 1.3901, "step": 20247 }, { "epoch": 0.7251239994986302, "grad_norm": 1.4042514562606812, "learning_rate": 3.707622419960751e-05, "loss": 1.5076, "step": 20248 }, { "epoch": 0.7251598116281984, "grad_norm": 2.3042328357696533, "learning_rate": 3.7067209720784456e-05, "loss": 1.2477, "step": 20249 }, { "epoch": 0.7251956237577668, "grad_norm": 1.4075384140014648, "learning_rate": 3.705819608864092e-05, "loss": 1.472, "step": 20250 }, { "epoch": 0.7252314358873351, "grad_norm": 1.6488524675369263, "learning_rate": 3.704918330329813e-05, "loss": 1.6962, "step": 20251 }, { "epoch": 0.7252672480169033, "grad_norm": 1.7311025857925415, "learning_rate": 3.704017136487737e-05, "loss": 1.3322, "step": 20252 }, { "epoch": 0.7253030601464716, "grad_norm": 1.5187498331069946, "learning_rate": 3.70311602734999e-05, "loss": 1.6023, "step": 20253 }, { "epoch": 0.7253388722760399, "grad_norm": 1.8480957746505737, "learning_rate": 3.702215002928699e-05, "loss": 1.3335, "step": 20254 }, { "epoch": 0.7253746844056081, "grad_norm": 2.199833393096924, "learning_rate": 3.701314063235972e-05, "loss": 1.33, "step": 20255 }, { "epoch": 0.7254104965351764, "grad_norm": 1.5730723142623901, "learning_rate": 3.7004132082839485e-05, "loss": 1.0013, "step": 20256 }, { "epoch": 0.7254463086647448, "grad_norm": 1.6218773126602173, "learning_rate": 3.699512438084736e-05, "loss": 1.2575, "step": 20257 }, { "epoch": 0.7254821207943131, "grad_norm": 2.079622507095337, "learning_rate": 3.6986117526504595e-05, "loss": 1.5331, "step": 20258 }, { "epoch": 0.7255179329238813, "grad_norm": 1.8061600923538208, "learning_rate": 3.6977111519932295e-05, "loss": 1.3609, "step": 20259 }, { "epoch": 0.7255537450534496, "grad_norm": 2.0372745990753174, "learning_rate": 3.696810636125168e-05, "loss": 1.6058, "step": 20260 }, { "epoch": 0.7255895571830179, "grad_norm": 1.9786975383758545, "learning_rate": 3.69591020505839e-05, "loss": 1.6915, "step": 20261 }, { "epoch": 0.7256253693125861, "grad_norm": 1.4099403619766235, "learning_rate": 3.6950098588050074e-05, "loss": 1.2547, "step": 20262 }, { "epoch": 0.7256611814421544, "grad_norm": 1.5284037590026855, "learning_rate": 3.6941095973771334e-05, "loss": 1.3404, "step": 20263 }, { "epoch": 0.7256969935717228, "grad_norm": 1.6371190547943115, "learning_rate": 3.6932094207868806e-05, "loss": 1.5385, "step": 20264 }, { "epoch": 0.7257328057012911, "grad_norm": 2.2500505447387695, "learning_rate": 3.692309329046364e-05, "loss": 1.6577, "step": 20265 }, { "epoch": 0.7257686178308593, "grad_norm": 1.5898199081420898, "learning_rate": 3.691409322167685e-05, "loss": 1.5561, "step": 20266 }, { "epoch": 0.7258044299604276, "grad_norm": 1.3862446546554565, "learning_rate": 3.690509400162957e-05, "loss": 1.1933, "step": 20267 }, { "epoch": 0.7258402420899959, "grad_norm": 2.0691616535186768, "learning_rate": 3.689609563044288e-05, "loss": 1.6209, "step": 20268 }, { "epoch": 0.7258760542195641, "grad_norm": 1.7785332202911377, "learning_rate": 3.68870981082378e-05, "loss": 1.4089, "step": 20269 }, { "epoch": 0.7259118663491324, "grad_norm": 1.5039234161376953, "learning_rate": 3.687810143513541e-05, "loss": 1.1616, "step": 20270 }, { "epoch": 0.7259476784787008, "grad_norm": 1.4419066905975342, "learning_rate": 3.686910561125675e-05, "loss": 1.4868, "step": 20271 }, { "epoch": 0.725983490608269, "grad_norm": 1.712319254875183, "learning_rate": 3.6860110636722856e-05, "loss": 1.5521, "step": 20272 }, { "epoch": 0.7260193027378373, "grad_norm": 1.749627709388733, "learning_rate": 3.6851116511654705e-05, "loss": 1.3738, "step": 20273 }, { "epoch": 0.7260551148674056, "grad_norm": 1.3292651176452637, "learning_rate": 3.684212323617333e-05, "loss": 1.4005, "step": 20274 }, { "epoch": 0.7260909269969739, "grad_norm": 1.6305632591247559, "learning_rate": 3.683313081039971e-05, "loss": 1.5385, "step": 20275 }, { "epoch": 0.7261267391265421, "grad_norm": 1.43117094039917, "learning_rate": 3.6824139234454876e-05, "loss": 1.5204, "step": 20276 }, { "epoch": 0.7261625512561104, "grad_norm": 1.7682982683181763, "learning_rate": 3.681514850845972e-05, "loss": 1.5069, "step": 20277 }, { "epoch": 0.7261983633856788, "grad_norm": 1.9330202341079712, "learning_rate": 3.6806158632535235e-05, "loss": 1.547, "step": 20278 }, { "epoch": 0.726234175515247, "grad_norm": 1.5854822397232056, "learning_rate": 3.679716960680242e-05, "loss": 1.6898, "step": 20279 }, { "epoch": 0.7262699876448153, "grad_norm": 1.5087810754776, "learning_rate": 3.6788181431382106e-05, "loss": 1.6337, "step": 20280 }, { "epoch": 0.7263057997743836, "grad_norm": 1.8749876022338867, "learning_rate": 3.6779194106395285e-05, "loss": 1.6861, "step": 20281 }, { "epoch": 0.7263416119039519, "grad_norm": 1.8120014667510986, "learning_rate": 3.677020763196286e-05, "loss": 1.673, "step": 20282 }, { "epoch": 0.7263774240335201, "grad_norm": 3.601649522781372, "learning_rate": 3.676122200820577e-05, "loss": 2.1907, "step": 20283 }, { "epoch": 0.7264132361630884, "grad_norm": 1.9699167013168335, "learning_rate": 3.6752237235244825e-05, "loss": 1.4932, "step": 20284 }, { "epoch": 0.7264490482926568, "grad_norm": 1.2282700538635254, "learning_rate": 3.6743253313200945e-05, "loss": 1.2088, "step": 20285 }, { "epoch": 0.726484860422225, "grad_norm": 1.4451675415039062, "learning_rate": 3.673427024219502e-05, "loss": 1.669, "step": 20286 }, { "epoch": 0.7265206725517933, "grad_norm": 1.7398184537887573, "learning_rate": 3.672528802234786e-05, "loss": 1.1963, "step": 20287 }, { "epoch": 0.7265564846813616, "grad_norm": 1.5839215517044067, "learning_rate": 3.671630665378033e-05, "loss": 1.6271, "step": 20288 }, { "epoch": 0.7265922968109298, "grad_norm": 1.601536512374878, "learning_rate": 3.670732613661326e-05, "loss": 1.5271, "step": 20289 }, { "epoch": 0.7266281089404981, "grad_norm": 1.5038881301879883, "learning_rate": 3.669834647096752e-05, "loss": 1.4387, "step": 20290 }, { "epoch": 0.7266639210700664, "grad_norm": 3.174227476119995, "learning_rate": 3.668936765696383e-05, "loss": 1.489, "step": 20291 }, { "epoch": 0.7266997331996348, "grad_norm": 1.7805509567260742, "learning_rate": 3.6680389694723025e-05, "loss": 1.6459, "step": 20292 }, { "epoch": 0.726735545329203, "grad_norm": 2.242903232574463, "learning_rate": 3.667141258436592e-05, "loss": 1.7589, "step": 20293 }, { "epoch": 0.7267713574587713, "grad_norm": 1.5864604711532593, "learning_rate": 3.666243632601329e-05, "loss": 1.6975, "step": 20294 }, { "epoch": 0.7268071695883396, "grad_norm": 1.4965282678604126, "learning_rate": 3.6653460919785855e-05, "loss": 1.4238, "step": 20295 }, { "epoch": 0.7268429817179078, "grad_norm": 1.7679928541183472, "learning_rate": 3.6644486365804385e-05, "loss": 1.2437, "step": 20296 }, { "epoch": 0.7268787938474761, "grad_norm": 1.58811616897583, "learning_rate": 3.663551266418966e-05, "loss": 1.5576, "step": 20297 }, { "epoch": 0.7269146059770444, "grad_norm": 1.8119292259216309, "learning_rate": 3.662653981506235e-05, "loss": 1.4472, "step": 20298 }, { "epoch": 0.7269504181066128, "grad_norm": 2.19586443901062, "learning_rate": 3.661756781854321e-05, "loss": 1.371, "step": 20299 }, { "epoch": 0.726986230236181, "grad_norm": 1.741518259048462, "learning_rate": 3.660859667475293e-05, "loss": 1.3297, "step": 20300 }, { "epoch": 0.7270220423657493, "grad_norm": 3.0605978965759277, "learning_rate": 3.659962638381224e-05, "loss": 1.763, "step": 20301 }, { "epoch": 0.7270578544953176, "grad_norm": 1.5931988954544067, "learning_rate": 3.6590656945841775e-05, "loss": 1.4453, "step": 20302 }, { "epoch": 0.7270936666248858, "grad_norm": 1.3028597831726074, "learning_rate": 3.6581688360962206e-05, "loss": 1.4289, "step": 20303 }, { "epoch": 0.7271294787544541, "grad_norm": 1.5821110010147095, "learning_rate": 3.6572720629294276e-05, "loss": 1.7085, "step": 20304 }, { "epoch": 0.7271652908840224, "grad_norm": 1.6137861013412476, "learning_rate": 3.656375375095853e-05, "loss": 1.2497, "step": 20305 }, { "epoch": 0.7272011030135908, "grad_norm": 1.879036545753479, "learning_rate": 3.655478772607565e-05, "loss": 1.6787, "step": 20306 }, { "epoch": 0.727236915143159, "grad_norm": 1.7845239639282227, "learning_rate": 3.654582255476626e-05, "loss": 1.6337, "step": 20307 }, { "epoch": 0.7272727272727273, "grad_norm": 2.2749297618865967, "learning_rate": 3.6536858237151015e-05, "loss": 1.226, "step": 20308 }, { "epoch": 0.7273085394022956, "grad_norm": 1.7021535634994507, "learning_rate": 3.652789477335045e-05, "loss": 1.4401, "step": 20309 }, { "epoch": 0.7273443515318638, "grad_norm": 2.237609624862671, "learning_rate": 3.651893216348517e-05, "loss": 1.3534, "step": 20310 }, { "epoch": 0.7273801636614321, "grad_norm": 1.8463900089263916, "learning_rate": 3.65099704076758e-05, "loss": 1.2325, "step": 20311 }, { "epoch": 0.7274159757910004, "grad_norm": 1.7602739334106445, "learning_rate": 3.650100950604289e-05, "loss": 1.4707, "step": 20312 }, { "epoch": 0.7274517879205687, "grad_norm": 1.4012271165847778, "learning_rate": 3.649204945870701e-05, "loss": 1.4284, "step": 20313 }, { "epoch": 0.727487600050137, "grad_norm": 1.6311163902282715, "learning_rate": 3.6483090265788614e-05, "loss": 1.2821, "step": 20314 }, { "epoch": 0.7275234121797053, "grad_norm": 1.6619857549667358, "learning_rate": 3.647413192740836e-05, "loss": 1.284, "step": 20315 }, { "epoch": 0.7275592243092736, "grad_norm": 1.6856287717819214, "learning_rate": 3.64651744436867e-05, "loss": 1.4404, "step": 20316 }, { "epoch": 0.7275950364388418, "grad_norm": 2.37723708152771, "learning_rate": 3.6456217814744165e-05, "loss": 1.6775, "step": 20317 }, { "epoch": 0.7276308485684101, "grad_norm": 1.4512906074523926, "learning_rate": 3.644726204070125e-05, "loss": 1.6137, "step": 20318 }, { "epoch": 0.7276666606979784, "grad_norm": 1.3987200260162354, "learning_rate": 3.643830712167847e-05, "loss": 1.5412, "step": 20319 }, { "epoch": 0.7277024728275467, "grad_norm": 1.5745872259140015, "learning_rate": 3.6429353057796255e-05, "loss": 1.3009, "step": 20320 }, { "epoch": 0.727738284957115, "grad_norm": 2.594204902648926, "learning_rate": 3.642039984917509e-05, "loss": 1.5018, "step": 20321 }, { "epoch": 0.7277740970866833, "grad_norm": 1.6258982419967651, "learning_rate": 3.641144749593548e-05, "loss": 1.3628, "step": 20322 }, { "epoch": 0.7278099092162515, "grad_norm": 1.546212911605835, "learning_rate": 3.640249599819777e-05, "loss": 1.53, "step": 20323 }, { "epoch": 0.7278457213458198, "grad_norm": 1.531711220741272, "learning_rate": 3.639354535608248e-05, "loss": 1.4359, "step": 20324 }, { "epoch": 0.7278815334753881, "grad_norm": 1.293178677558899, "learning_rate": 3.638459556970993e-05, "loss": 1.3002, "step": 20325 }, { "epoch": 0.7279173456049564, "grad_norm": 1.8886504173278809, "learning_rate": 3.637564663920066e-05, "loss": 1.5775, "step": 20326 }, { "epoch": 0.7279531577345247, "grad_norm": 1.3030343055725098, "learning_rate": 3.636669856467495e-05, "loss": 1.443, "step": 20327 }, { "epoch": 0.727988969864093, "grad_norm": 1.8650761842727661, "learning_rate": 3.635775134625323e-05, "loss": 1.484, "step": 20328 }, { "epoch": 0.7280247819936613, "grad_norm": 1.1256822347640991, "learning_rate": 3.634880498405587e-05, "loss": 1.3617, "step": 20329 }, { "epoch": 0.7280605941232295, "grad_norm": 1.5693796873092651, "learning_rate": 3.6339859478203274e-05, "loss": 1.2764, "step": 20330 }, { "epoch": 0.7280964062527978, "grad_norm": 2.721557378768921, "learning_rate": 3.6330914828815755e-05, "loss": 1.6122, "step": 20331 }, { "epoch": 0.7281322183823661, "grad_norm": 1.8987950086593628, "learning_rate": 3.632197103601358e-05, "loss": 1.1593, "step": 20332 }, { "epoch": 0.7281680305119343, "grad_norm": 1.3564764261245728, "learning_rate": 3.6313028099917226e-05, "loss": 1.4216, "step": 20333 }, { "epoch": 0.7282038426415027, "grad_norm": 1.3423123359680176, "learning_rate": 3.6304086020646874e-05, "loss": 1.2289, "step": 20334 }, { "epoch": 0.728239654771071, "grad_norm": 2.045650005340576, "learning_rate": 3.629514479832292e-05, "loss": 1.5775, "step": 20335 }, { "epoch": 0.7282754669006393, "grad_norm": 1.7621314525604248, "learning_rate": 3.628620443306556e-05, "loss": 1.4242, "step": 20336 }, { "epoch": 0.7283112790302075, "grad_norm": 1.5578246116638184, "learning_rate": 3.62772649249952e-05, "loss": 1.4387, "step": 20337 }, { "epoch": 0.7283470911597758, "grad_norm": 1.3517426252365112, "learning_rate": 3.626832627423201e-05, "loss": 1.7368, "step": 20338 }, { "epoch": 0.7283829032893441, "grad_norm": 1.414923906326294, "learning_rate": 3.6259388480896316e-05, "loss": 1.4573, "step": 20339 }, { "epoch": 0.7284187154189123, "grad_norm": 1.8400875329971313, "learning_rate": 3.62504515451083e-05, "loss": 1.3403, "step": 20340 }, { "epoch": 0.7284545275484807, "grad_norm": 1.4856648445129395, "learning_rate": 3.624151546698822e-05, "loss": 1.5307, "step": 20341 }, { "epoch": 0.728490339678049, "grad_norm": 1.957085371017456, "learning_rate": 3.623258024665635e-05, "loss": 1.3083, "step": 20342 }, { "epoch": 0.7285261518076173, "grad_norm": 1.5131561756134033, "learning_rate": 3.6223645884232784e-05, "loss": 1.4215, "step": 20343 }, { "epoch": 0.7285619639371855, "grad_norm": 1.8135859966278076, "learning_rate": 3.621471237983787e-05, "loss": 1.5306, "step": 20344 }, { "epoch": 0.7285977760667538, "grad_norm": 2.384350538253784, "learning_rate": 3.620577973359168e-05, "loss": 1.492, "step": 20345 }, { "epoch": 0.7286335881963221, "grad_norm": 1.0702521800994873, "learning_rate": 3.619684794561448e-05, "loss": 0.9597, "step": 20346 }, { "epoch": 0.7286694003258903, "grad_norm": 1.8656784296035767, "learning_rate": 3.618791701602635e-05, "loss": 1.4725, "step": 20347 }, { "epoch": 0.7287052124554587, "grad_norm": 1.8539940118789673, "learning_rate": 3.617898694494749e-05, "loss": 1.6358, "step": 20348 }, { "epoch": 0.728741024585027, "grad_norm": 1.284334659576416, "learning_rate": 3.6170057732498064e-05, "loss": 1.6315, "step": 20349 }, { "epoch": 0.7287768367145953, "grad_norm": 1.698467493057251, "learning_rate": 3.616112937879814e-05, "loss": 1.4993, "step": 20350 }, { "epoch": 0.7288126488441635, "grad_norm": 1.6168347597122192, "learning_rate": 3.6152201883967885e-05, "loss": 1.56, "step": 20351 }, { "epoch": 0.7288484609737318, "grad_norm": 1.26214599609375, "learning_rate": 3.6143275248127394e-05, "loss": 1.4896, "step": 20352 }, { "epoch": 0.7288842731033001, "grad_norm": 1.7499330043792725, "learning_rate": 3.61343494713968e-05, "loss": 1.2003, "step": 20353 }, { "epoch": 0.7289200852328683, "grad_norm": 2.1772499084472656, "learning_rate": 3.612542455389608e-05, "loss": 1.5853, "step": 20354 }, { "epoch": 0.7289558973624367, "grad_norm": 1.5749849081039429, "learning_rate": 3.611650049574545e-05, "loss": 1.5532, "step": 20355 }, { "epoch": 0.728991709492005, "grad_norm": 1.753783106803894, "learning_rate": 3.61075772970649e-05, "loss": 1.4504, "step": 20356 }, { "epoch": 0.7290275216215732, "grad_norm": 2.087217330932617, "learning_rate": 3.609865495797445e-05, "loss": 1.6384, "step": 20357 }, { "epoch": 0.7290633337511415, "grad_norm": 1.6963598728179932, "learning_rate": 3.608973347859418e-05, "loss": 1.388, "step": 20358 }, { "epoch": 0.7290991458807098, "grad_norm": 1.416736364364624, "learning_rate": 3.6080812859044086e-05, "loss": 1.4647, "step": 20359 }, { "epoch": 0.7291349580102781, "grad_norm": 2.7827279567718506, "learning_rate": 3.607189309944427e-05, "loss": 1.3479, "step": 20360 }, { "epoch": 0.7291707701398463, "grad_norm": 1.966021180152893, "learning_rate": 3.6062974199914615e-05, "loss": 1.5521, "step": 20361 }, { "epoch": 0.7292065822694147, "grad_norm": 1.5805641412734985, "learning_rate": 3.6054056160575164e-05, "loss": 1.3172, "step": 20362 }, { "epoch": 0.729242394398983, "grad_norm": 1.9374136924743652, "learning_rate": 3.6045138981545915e-05, "loss": 1.4399, "step": 20363 }, { "epoch": 0.7292782065285512, "grad_norm": 2.0594398975372314, "learning_rate": 3.603622266294686e-05, "loss": 1.1237, "step": 20364 }, { "epoch": 0.7293140186581195, "grad_norm": 1.5763599872589111, "learning_rate": 3.6027307204897886e-05, "loss": 1.3851, "step": 20365 }, { "epoch": 0.7293498307876878, "grad_norm": 1.818981409072876, "learning_rate": 3.601839260751897e-05, "loss": 1.3178, "step": 20366 }, { "epoch": 0.729385642917256, "grad_norm": 1.4879289865493774, "learning_rate": 3.600947887093009e-05, "loss": 1.3811, "step": 20367 }, { "epoch": 0.7294214550468243, "grad_norm": 1.7002722024917603, "learning_rate": 3.600056599525109e-05, "loss": 1.3244, "step": 20368 }, { "epoch": 0.7294572671763927, "grad_norm": 1.7715823650360107, "learning_rate": 3.5991653980601926e-05, "loss": 1.2399, "step": 20369 }, { "epoch": 0.729493079305961, "grad_norm": 2.60311222076416, "learning_rate": 3.59827428271025e-05, "loss": 1.6152, "step": 20370 }, { "epoch": 0.7295288914355292, "grad_norm": 1.446850061416626, "learning_rate": 3.597383253487272e-05, "loss": 1.3049, "step": 20371 }, { "epoch": 0.7295647035650975, "grad_norm": 1.3441686630249023, "learning_rate": 3.59649231040324e-05, "loss": 1.4572, "step": 20372 }, { "epoch": 0.7296005156946658, "grad_norm": 1.666536569595337, "learning_rate": 3.595601453470143e-05, "loss": 1.5172, "step": 20373 }, { "epoch": 0.729636327824234, "grad_norm": 1.4410146474838257, "learning_rate": 3.594710682699972e-05, "loss": 1.6782, "step": 20374 }, { "epoch": 0.7296721399538023, "grad_norm": 1.8410130739212036, "learning_rate": 3.5938199981047036e-05, "loss": 1.5535, "step": 20375 }, { "epoch": 0.7297079520833707, "grad_norm": 1.9997683763504028, "learning_rate": 3.592929399696323e-05, "loss": 1.3053, "step": 20376 }, { "epoch": 0.729743764212939, "grad_norm": 1.5407726764678955, "learning_rate": 3.592038887486813e-05, "loss": 1.4949, "step": 20377 }, { "epoch": 0.7297795763425072, "grad_norm": 2.03817081451416, "learning_rate": 3.591148461488157e-05, "loss": 1.5091, "step": 20378 }, { "epoch": 0.7298153884720755, "grad_norm": 1.7712717056274414, "learning_rate": 3.590258121712329e-05, "loss": 1.8162, "step": 20379 }, { "epoch": 0.7298512006016438, "grad_norm": 1.590785264968872, "learning_rate": 3.589367868171309e-05, "loss": 1.2453, "step": 20380 }, { "epoch": 0.729887012731212, "grad_norm": 1.64919114112854, "learning_rate": 3.5884777008770765e-05, "loss": 1.5673, "step": 20381 }, { "epoch": 0.7299228248607803, "grad_norm": 1.4542080163955688, "learning_rate": 3.587587619841609e-05, "loss": 0.9534, "step": 20382 }, { "epoch": 0.7299586369903487, "grad_norm": 1.3254774808883667, "learning_rate": 3.586697625076876e-05, "loss": 1.4704, "step": 20383 }, { "epoch": 0.729994449119917, "grad_norm": 1.9751310348510742, "learning_rate": 3.585807716594853e-05, "loss": 1.4692, "step": 20384 }, { "epoch": 0.7300302612494852, "grad_norm": 1.78018057346344, "learning_rate": 3.584917894407517e-05, "loss": 1.4879, "step": 20385 }, { "epoch": 0.7300660733790535, "grad_norm": 1.4248936176300049, "learning_rate": 3.584028158526832e-05, "loss": 1.4147, "step": 20386 }, { "epoch": 0.7301018855086218, "grad_norm": 2.423626184463501, "learning_rate": 3.583138508964773e-05, "loss": 1.6582, "step": 20387 }, { "epoch": 0.73013769763819, "grad_norm": 1.622358798980713, "learning_rate": 3.582248945733307e-05, "loss": 1.6244, "step": 20388 }, { "epoch": 0.7301735097677583, "grad_norm": 2.3228282928466797, "learning_rate": 3.581359468844408e-05, "loss": 1.8354, "step": 20389 }, { "epoch": 0.7302093218973267, "grad_norm": 1.6981981992721558, "learning_rate": 3.580470078310034e-05, "loss": 1.3045, "step": 20390 }, { "epoch": 0.730245134026895, "grad_norm": 2.001455068588257, "learning_rate": 3.579580774142155e-05, "loss": 1.2112, "step": 20391 }, { "epoch": 0.7302809461564632, "grad_norm": 2.1798324584960938, "learning_rate": 3.5786915563527376e-05, "loss": 1.155, "step": 20392 }, { "epoch": 0.7303167582860315, "grad_norm": 1.86077880859375, "learning_rate": 3.577802424953739e-05, "loss": 1.7546, "step": 20393 }, { "epoch": 0.7303525704155998, "grad_norm": 2.3468453884124756, "learning_rate": 3.576913379957125e-05, "loss": 1.22, "step": 20394 }, { "epoch": 0.730388382545168, "grad_norm": 1.3611379861831665, "learning_rate": 3.5760244213748565e-05, "loss": 1.3976, "step": 20395 }, { "epoch": 0.7304241946747363, "grad_norm": 1.5339411497116089, "learning_rate": 3.575135549218895e-05, "loss": 1.5949, "step": 20396 }, { "epoch": 0.7304600068043047, "grad_norm": 2.1556475162506104, "learning_rate": 3.5742467635011956e-05, "loss": 1.4444, "step": 20397 }, { "epoch": 0.7304958189338729, "grad_norm": 1.9611434936523438, "learning_rate": 3.5733580642337174e-05, "loss": 1.4917, "step": 20398 }, { "epoch": 0.7305316310634412, "grad_norm": 1.5778285264968872, "learning_rate": 3.572469451428415e-05, "loss": 1.274, "step": 20399 }, { "epoch": 0.7305674431930095, "grad_norm": 1.7948662042617798, "learning_rate": 3.57158092509725e-05, "loss": 1.245, "step": 20400 }, { "epoch": 0.7306032553225777, "grad_norm": 1.58558988571167, "learning_rate": 3.5706924852521674e-05, "loss": 1.4525, "step": 20401 }, { "epoch": 0.730639067452146, "grad_norm": 2.173316478729248, "learning_rate": 3.5698041319051245e-05, "loss": 1.7764, "step": 20402 }, { "epoch": 0.7306748795817143, "grad_norm": 1.275415062904358, "learning_rate": 3.5689158650680765e-05, "loss": 1.5483, "step": 20403 }, { "epoch": 0.7307106917112826, "grad_norm": 1.8517347574234009, "learning_rate": 3.568027684752966e-05, "loss": 1.6182, "step": 20404 }, { "epoch": 0.7307465038408509, "grad_norm": 1.9856765270233154, "learning_rate": 3.5671395909717477e-05, "loss": 1.3489, "step": 20405 }, { "epoch": 0.7307823159704192, "grad_norm": 1.7788243293762207, "learning_rate": 3.566251583736367e-05, "loss": 1.5068, "step": 20406 }, { "epoch": 0.7308181280999875, "grad_norm": 1.406541109085083, "learning_rate": 3.5653636630587764e-05, "loss": 1.3148, "step": 20407 }, { "epoch": 0.7308539402295557, "grad_norm": 1.4304836988449097, "learning_rate": 3.5644758289509126e-05, "loss": 1.1475, "step": 20408 }, { "epoch": 0.730889752359124, "grad_norm": 1.7873742580413818, "learning_rate": 3.563588081424727e-05, "loss": 1.363, "step": 20409 }, { "epoch": 0.7309255644886923, "grad_norm": 1.6076723337173462, "learning_rate": 3.5627004204921645e-05, "loss": 1.7126, "step": 20410 }, { "epoch": 0.7309613766182605, "grad_norm": 1.798566460609436, "learning_rate": 3.561812846165161e-05, "loss": 1.4759, "step": 20411 }, { "epoch": 0.7309971887478289, "grad_norm": 2.0032665729522705, "learning_rate": 3.56092535845566e-05, "loss": 1.3545, "step": 20412 }, { "epoch": 0.7310330008773972, "grad_norm": 1.4603948593139648, "learning_rate": 3.560037957375604e-05, "loss": 1.3149, "step": 20413 }, { "epoch": 0.7310688130069655, "grad_norm": 1.8404431343078613, "learning_rate": 3.5591506429369325e-05, "loss": 1.7072, "step": 20414 }, { "epoch": 0.7311046251365337, "grad_norm": 1.4005978107452393, "learning_rate": 3.558263415151578e-05, "loss": 1.4017, "step": 20415 }, { "epoch": 0.731140437266102, "grad_norm": 1.3122055530548096, "learning_rate": 3.557376274031481e-05, "loss": 1.3193, "step": 20416 }, { "epoch": 0.7311762493956703, "grad_norm": 1.732801914215088, "learning_rate": 3.556489219588575e-05, "loss": 1.8159, "step": 20417 }, { "epoch": 0.7312120615252385, "grad_norm": 1.7822617292404175, "learning_rate": 3.5556022518347975e-05, "loss": 1.529, "step": 20418 }, { "epoch": 0.7312478736548069, "grad_norm": 1.499024510383606, "learning_rate": 3.55471537078208e-05, "loss": 1.2738, "step": 20419 }, { "epoch": 0.7312836857843752, "grad_norm": 3.1258182525634766, "learning_rate": 3.553828576442346e-05, "loss": 2.0017, "step": 20420 }, { "epoch": 0.7313194979139435, "grad_norm": 1.561532974243164, "learning_rate": 3.552941868827542e-05, "loss": 1.5181, "step": 20421 }, { "epoch": 0.7313553100435117, "grad_norm": 1.2803188562393188, "learning_rate": 3.552055247949584e-05, "loss": 1.5402, "step": 20422 }, { "epoch": 0.73139112217308, "grad_norm": 2.332247734069824, "learning_rate": 3.5511687138204097e-05, "loss": 1.2652, "step": 20423 }, { "epoch": 0.7314269343026483, "grad_norm": 1.3746566772460938, "learning_rate": 3.5502822664519345e-05, "loss": 1.5465, "step": 20424 }, { "epoch": 0.7314627464322165, "grad_norm": 1.6845834255218506, "learning_rate": 3.549395905856099e-05, "loss": 1.3739, "step": 20425 }, { "epoch": 0.7314985585617849, "grad_norm": 1.6807371377944946, "learning_rate": 3.5485096320448176e-05, "loss": 1.6331, "step": 20426 }, { "epoch": 0.7315343706913532, "grad_norm": 1.6530333757400513, "learning_rate": 3.547623445030016e-05, "loss": 1.2784, "step": 20427 }, { "epoch": 0.7315701828209215, "grad_norm": 1.698762059211731, "learning_rate": 3.546737344823623e-05, "loss": 1.4305, "step": 20428 }, { "epoch": 0.7316059949504897, "grad_norm": 2.0598812103271484, "learning_rate": 3.545851331437551e-05, "loss": 1.3792, "step": 20429 }, { "epoch": 0.731641807080058, "grad_norm": 1.8785374164581299, "learning_rate": 3.544965404883728e-05, "loss": 1.1282, "step": 20430 }, { "epoch": 0.7316776192096263, "grad_norm": 2.2922866344451904, "learning_rate": 3.544079565174061e-05, "loss": 1.4628, "step": 20431 }, { "epoch": 0.7317134313391945, "grad_norm": 2.218919277191162, "learning_rate": 3.543193812320483e-05, "loss": 1.3955, "step": 20432 }, { "epoch": 0.7317492434687629, "grad_norm": 1.631608009338379, "learning_rate": 3.542308146334901e-05, "loss": 1.6856, "step": 20433 }, { "epoch": 0.7317850555983312, "grad_norm": 1.6857116222381592, "learning_rate": 3.541422567229235e-05, "loss": 1.4069, "step": 20434 }, { "epoch": 0.7318208677278994, "grad_norm": 1.6167864799499512, "learning_rate": 3.540537075015393e-05, "loss": 1.6345, "step": 20435 }, { "epoch": 0.7318566798574677, "grad_norm": 1.622429609298706, "learning_rate": 3.539651669705297e-05, "loss": 1.3154, "step": 20436 }, { "epoch": 0.731892491987036, "grad_norm": 1.8493098020553589, "learning_rate": 3.538766351310856e-05, "loss": 1.664, "step": 20437 }, { "epoch": 0.7319283041166043, "grad_norm": 2.0410239696502686, "learning_rate": 3.537881119843972e-05, "loss": 1.0608, "step": 20438 }, { "epoch": 0.7319641162461725, "grad_norm": 1.2448198795318604, "learning_rate": 3.5369959753165694e-05, "loss": 1.3688, "step": 20439 }, { "epoch": 0.7319999283757409, "grad_norm": 1.456103801727295, "learning_rate": 3.536110917740545e-05, "loss": 1.4517, "step": 20440 }, { "epoch": 0.7320357405053092, "grad_norm": 1.7088440656661987, "learning_rate": 3.5352259471278146e-05, "loss": 1.3145, "step": 20441 }, { "epoch": 0.7320715526348774, "grad_norm": 1.8974305391311646, "learning_rate": 3.534341063490273e-05, "loss": 1.3612, "step": 20442 }, { "epoch": 0.7321073647644457, "grad_norm": 1.5721908807754517, "learning_rate": 3.533456266839838e-05, "loss": 1.4555, "step": 20443 }, { "epoch": 0.732143176894014, "grad_norm": 1.6463236808776855, "learning_rate": 3.532571557188409e-05, "loss": 1.4655, "step": 20444 }, { "epoch": 0.7321789890235822, "grad_norm": 2.0902090072631836, "learning_rate": 3.531686934547884e-05, "loss": 1.2894, "step": 20445 }, { "epoch": 0.7322148011531505, "grad_norm": 1.4955464601516724, "learning_rate": 3.5308023989301676e-05, "loss": 1.286, "step": 20446 }, { "epoch": 0.7322506132827189, "grad_norm": 2.225278854370117, "learning_rate": 3.529917950347159e-05, "loss": 1.6805, "step": 20447 }, { "epoch": 0.7322864254122872, "grad_norm": 1.585610270500183, "learning_rate": 3.529033588810764e-05, "loss": 1.1171, "step": 20448 }, { "epoch": 0.7323222375418554, "grad_norm": 1.6811686754226685, "learning_rate": 3.52814931433287e-05, "loss": 1.5252, "step": 20449 }, { "epoch": 0.7323580496714237, "grad_norm": 2.0592916011810303, "learning_rate": 3.52726512692538e-05, "loss": 1.6063, "step": 20450 }, { "epoch": 0.732393861800992, "grad_norm": 1.6226791143417358, "learning_rate": 3.526381026600188e-05, "loss": 1.3123, "step": 20451 }, { "epoch": 0.7324296739305602, "grad_norm": 1.4268947839736938, "learning_rate": 3.5254970133691925e-05, "loss": 1.3222, "step": 20452 }, { "epoch": 0.7324654860601285, "grad_norm": 1.6242847442626953, "learning_rate": 3.5246130872442794e-05, "loss": 1.3596, "step": 20453 }, { "epoch": 0.7325012981896969, "grad_norm": 1.6318475008010864, "learning_rate": 3.523729248237345e-05, "loss": 1.2142, "step": 20454 }, { "epoch": 0.7325371103192652, "grad_norm": 1.932065486907959, "learning_rate": 3.522845496360283e-05, "loss": 1.6373, "step": 20455 }, { "epoch": 0.7325729224488334, "grad_norm": 1.579846978187561, "learning_rate": 3.5219618316249766e-05, "loss": 1.5628, "step": 20456 }, { "epoch": 0.7326087345784017, "grad_norm": 1.3308910131454468, "learning_rate": 3.521078254043317e-05, "loss": 1.6539, "step": 20457 }, { "epoch": 0.73264454670797, "grad_norm": 1.5070046186447144, "learning_rate": 3.5201947636271934e-05, "loss": 1.1408, "step": 20458 }, { "epoch": 0.7326803588375382, "grad_norm": 2.3446309566497803, "learning_rate": 3.519311360388494e-05, "loss": 1.5686, "step": 20459 }, { "epoch": 0.7327161709671065, "grad_norm": 1.3851144313812256, "learning_rate": 3.518428044339097e-05, "loss": 1.4275, "step": 20460 }, { "epoch": 0.7327519830966749, "grad_norm": 1.6674261093139648, "learning_rate": 3.5175448154908895e-05, "loss": 1.459, "step": 20461 }, { "epoch": 0.7327877952262432, "grad_norm": 1.6122231483459473, "learning_rate": 3.516661673855759e-05, "loss": 1.3823, "step": 20462 }, { "epoch": 0.7328236073558114, "grad_norm": 1.8272632360458374, "learning_rate": 3.51577861944558e-05, "loss": 1.5136, "step": 20463 }, { "epoch": 0.7328594194853797, "grad_norm": 2.42698335647583, "learning_rate": 3.5148956522722346e-05, "loss": 1.2525, "step": 20464 }, { "epoch": 0.732895231614948, "grad_norm": 1.7631263732910156, "learning_rate": 3.5140127723476034e-05, "loss": 1.351, "step": 20465 }, { "epoch": 0.7329310437445162, "grad_norm": 1.357838749885559, "learning_rate": 3.513129979683567e-05, "loss": 1.1883, "step": 20466 }, { "epoch": 0.7329668558740845, "grad_norm": 1.483927845954895, "learning_rate": 3.5122472742919965e-05, "loss": 1.3296, "step": 20467 }, { "epoch": 0.7330026680036529, "grad_norm": 2.0464913845062256, "learning_rate": 3.51136465618477e-05, "loss": 1.4515, "step": 20468 }, { "epoch": 0.7330384801332211, "grad_norm": 1.5297338962554932, "learning_rate": 3.510482125373762e-05, "loss": 1.1064, "step": 20469 }, { "epoch": 0.7330742922627894, "grad_norm": 1.6181906461715698, "learning_rate": 3.50959968187085e-05, "loss": 1.2575, "step": 20470 }, { "epoch": 0.7331101043923577, "grad_norm": 1.5216774940490723, "learning_rate": 3.508717325687898e-05, "loss": 1.234, "step": 20471 }, { "epoch": 0.733145916521926, "grad_norm": 2.048452615737915, "learning_rate": 3.5078350568367825e-05, "loss": 1.4341, "step": 20472 }, { "epoch": 0.7331817286514942, "grad_norm": 1.6940174102783203, "learning_rate": 3.5069528753293746e-05, "loss": 1.5693, "step": 20473 }, { "epoch": 0.7332175407810625, "grad_norm": 1.9572473764419556, "learning_rate": 3.506070781177537e-05, "loss": 1.419, "step": 20474 }, { "epoch": 0.7332533529106309, "grad_norm": 1.8093883991241455, "learning_rate": 3.505188774393141e-05, "loss": 1.4015, "step": 20475 }, { "epoch": 0.7332891650401991, "grad_norm": 1.1968109607696533, "learning_rate": 3.504306854988052e-05, "loss": 1.3426, "step": 20476 }, { "epoch": 0.7333249771697674, "grad_norm": 1.6046714782714844, "learning_rate": 3.5034250229741384e-05, "loss": 1.5721, "step": 20477 }, { "epoch": 0.7333607892993357, "grad_norm": 2.118436813354492, "learning_rate": 3.5025432783632585e-05, "loss": 1.56, "step": 20478 }, { "epoch": 0.733396601428904, "grad_norm": 2.097813129425049, "learning_rate": 3.501661621167277e-05, "loss": 1.2924, "step": 20479 }, { "epoch": 0.7334324135584722, "grad_norm": 1.8150711059570312, "learning_rate": 3.50078005139806e-05, "loss": 1.5607, "step": 20480 }, { "epoch": 0.7334682256880405, "grad_norm": 2.765345335006714, "learning_rate": 3.49989856906746e-05, "loss": 1.6204, "step": 20481 }, { "epoch": 0.7335040378176089, "grad_norm": 2.4202771186828613, "learning_rate": 3.499017174187341e-05, "loss": 1.797, "step": 20482 }, { "epoch": 0.7335398499471771, "grad_norm": 1.5121333599090576, "learning_rate": 3.498135866769561e-05, "loss": 1.4781, "step": 20483 }, { "epoch": 0.7335756620767454, "grad_norm": 1.4281476736068726, "learning_rate": 3.497254646825978e-05, "loss": 1.3851, "step": 20484 }, { "epoch": 0.7336114742063137, "grad_norm": 1.7604395151138306, "learning_rate": 3.496373514368443e-05, "loss": 1.3445, "step": 20485 }, { "epoch": 0.7336472863358819, "grad_norm": 1.9605915546417236, "learning_rate": 3.495492469408813e-05, "loss": 1.0695, "step": 20486 }, { "epoch": 0.7336830984654502, "grad_norm": 1.3502087593078613, "learning_rate": 3.494611511958942e-05, "loss": 1.2486, "step": 20487 }, { "epoch": 0.7337189105950185, "grad_norm": 2.571399688720703, "learning_rate": 3.493730642030685e-05, "loss": 1.5465, "step": 20488 }, { "epoch": 0.7337547227245869, "grad_norm": 1.424328327178955, "learning_rate": 3.492849859635885e-05, "loss": 1.2137, "step": 20489 }, { "epoch": 0.7337905348541551, "grad_norm": 1.9703563451766968, "learning_rate": 3.4919691647863984e-05, "loss": 1.3256, "step": 20490 }, { "epoch": 0.7338263469837234, "grad_norm": 1.6080145835876465, "learning_rate": 3.491088557494074e-05, "loss": 1.4436, "step": 20491 }, { "epoch": 0.7338621591132917, "grad_norm": 1.7678338289260864, "learning_rate": 3.490208037770755e-05, "loss": 1.6468, "step": 20492 }, { "epoch": 0.7338979712428599, "grad_norm": 1.807364583015442, "learning_rate": 3.4893276056282894e-05, "loss": 1.4296, "step": 20493 }, { "epoch": 0.7339337833724282, "grad_norm": 1.544736623764038, "learning_rate": 3.4884472610785224e-05, "loss": 1.3378, "step": 20494 }, { "epoch": 0.7339695955019965, "grad_norm": 1.670017123222351, "learning_rate": 3.487567004133302e-05, "loss": 1.5572, "step": 20495 }, { "epoch": 0.7340054076315649, "grad_norm": 1.2803508043289185, "learning_rate": 3.4866868348044634e-05, "loss": 1.5029, "step": 20496 }, { "epoch": 0.7340412197611331, "grad_norm": 1.7097656726837158, "learning_rate": 3.485806753103852e-05, "loss": 1.4671, "step": 20497 }, { "epoch": 0.7340770318907014, "grad_norm": 2.6056883335113525, "learning_rate": 3.484926759043311e-05, "loss": 1.5693, "step": 20498 }, { "epoch": 0.7341128440202697, "grad_norm": 2.137596607208252, "learning_rate": 3.484046852634674e-05, "loss": 1.4641, "step": 20499 }, { "epoch": 0.7341486561498379, "grad_norm": 1.4234815835952759, "learning_rate": 3.483167033889781e-05, "loss": 1.4199, "step": 20500 }, { "epoch": 0.7341844682794062, "grad_norm": 1.8504927158355713, "learning_rate": 3.4822873028204694e-05, "loss": 1.445, "step": 20501 }, { "epoch": 0.7342202804089745, "grad_norm": 1.4451167583465576, "learning_rate": 3.481407659438579e-05, "loss": 1.3129, "step": 20502 }, { "epoch": 0.7342560925385428, "grad_norm": 1.8816039562225342, "learning_rate": 3.480528103755937e-05, "loss": 1.5152, "step": 20503 }, { "epoch": 0.7342919046681111, "grad_norm": 1.3867018222808838, "learning_rate": 3.479648635784378e-05, "loss": 1.3284, "step": 20504 }, { "epoch": 0.7343277167976794, "grad_norm": 2.107089042663574, "learning_rate": 3.478769255535738e-05, "loss": 1.5423, "step": 20505 }, { "epoch": 0.7343635289272477, "grad_norm": 1.6862614154815674, "learning_rate": 3.4778899630218483e-05, "loss": 1.618, "step": 20506 }, { "epoch": 0.7343993410568159, "grad_norm": 2.2158303260803223, "learning_rate": 3.4770107582545365e-05, "loss": 1.2988, "step": 20507 }, { "epoch": 0.7344351531863842, "grad_norm": 1.4314731359481812, "learning_rate": 3.4761316412456235e-05, "loss": 1.4692, "step": 20508 }, { "epoch": 0.7344709653159525, "grad_norm": 1.4693208932876587, "learning_rate": 3.4752526120069516e-05, "loss": 1.2102, "step": 20509 }, { "epoch": 0.7345067774455208, "grad_norm": 1.7098275423049927, "learning_rate": 3.474373670550336e-05, "loss": 1.4572, "step": 20510 }, { "epoch": 0.7345425895750891, "grad_norm": 1.5971198081970215, "learning_rate": 3.4734948168876045e-05, "loss": 1.6368, "step": 20511 }, { "epoch": 0.7345784017046574, "grad_norm": 2.2307322025299072, "learning_rate": 3.4726160510305824e-05, "loss": 1.5739, "step": 20512 }, { "epoch": 0.7346142138342256, "grad_norm": 1.6279468536376953, "learning_rate": 3.471737372991095e-05, "loss": 1.7299, "step": 20513 }, { "epoch": 0.7346500259637939, "grad_norm": 2.171621084213257, "learning_rate": 3.470858782780957e-05, "loss": 1.7927, "step": 20514 }, { "epoch": 0.7346858380933622, "grad_norm": 1.6976094245910645, "learning_rate": 3.469980280411992e-05, "loss": 1.2166, "step": 20515 }, { "epoch": 0.7347216502229305, "grad_norm": 1.5864616632461548, "learning_rate": 3.469101865896023e-05, "loss": 1.4572, "step": 20516 }, { "epoch": 0.7347574623524988, "grad_norm": 1.2989685535430908, "learning_rate": 3.468223539244859e-05, "loss": 1.2894, "step": 20517 }, { "epoch": 0.7347932744820671, "grad_norm": 1.3882758617401123, "learning_rate": 3.467345300470327e-05, "loss": 1.3966, "step": 20518 }, { "epoch": 0.7348290866116354, "grad_norm": 1.4743343591690063, "learning_rate": 3.466467149584231e-05, "loss": 1.7203, "step": 20519 }, { "epoch": 0.7348648987412036, "grad_norm": 2.1220481395721436, "learning_rate": 3.4655890865983975e-05, "loss": 1.2081, "step": 20520 }, { "epoch": 0.7349007108707719, "grad_norm": 1.9602645635604858, "learning_rate": 3.464711111524631e-05, "loss": 1.5757, "step": 20521 }, { "epoch": 0.7349365230003402, "grad_norm": 1.7136316299438477, "learning_rate": 3.4638332243747464e-05, "loss": 1.1922, "step": 20522 }, { "epoch": 0.7349723351299084, "grad_norm": 1.697861671447754, "learning_rate": 3.4629554251605545e-05, "loss": 1.5317, "step": 20523 }, { "epoch": 0.7350081472594768, "grad_norm": 1.6139419078826904, "learning_rate": 3.4620777138938695e-05, "loss": 1.5648, "step": 20524 }, { "epoch": 0.7350439593890451, "grad_norm": 1.9879515171051025, "learning_rate": 3.461200090586495e-05, "loss": 1.5372, "step": 20525 }, { "epoch": 0.7350797715186134, "grad_norm": 1.6399869918823242, "learning_rate": 3.4603225552502315e-05, "loss": 1.3933, "step": 20526 }, { "epoch": 0.7351155836481816, "grad_norm": 1.606967568397522, "learning_rate": 3.4594451078969005e-05, "loss": 1.2946, "step": 20527 }, { "epoch": 0.7351513957777499, "grad_norm": 1.6109073162078857, "learning_rate": 3.458567748538295e-05, "loss": 1.5296, "step": 20528 }, { "epoch": 0.7351872079073182, "grad_norm": 1.4543050527572632, "learning_rate": 3.457690477186225e-05, "loss": 1.4994, "step": 20529 }, { "epoch": 0.7352230200368864, "grad_norm": 1.369817852973938, "learning_rate": 3.4568132938524845e-05, "loss": 1.2237, "step": 20530 }, { "epoch": 0.7352588321664548, "grad_norm": 1.4970976114273071, "learning_rate": 3.455936198548888e-05, "loss": 1.4621, "step": 20531 }, { "epoch": 0.7352946442960231, "grad_norm": 1.6439193487167358, "learning_rate": 3.455059191287225e-05, "loss": 1.5679, "step": 20532 }, { "epoch": 0.7353304564255914, "grad_norm": 1.7179007530212402, "learning_rate": 3.454182272079303e-05, "loss": 1.494, "step": 20533 }, { "epoch": 0.7353662685551596, "grad_norm": 1.4830936193466187, "learning_rate": 3.45330544093691e-05, "loss": 1.4933, "step": 20534 }, { "epoch": 0.7354020806847279, "grad_norm": 1.8476966619491577, "learning_rate": 3.4524286978718475e-05, "loss": 1.3555, "step": 20535 }, { "epoch": 0.7354378928142962, "grad_norm": 1.3938565254211426, "learning_rate": 3.451552042895916e-05, "loss": 1.3831, "step": 20536 }, { "epoch": 0.7354737049438644, "grad_norm": 1.6916418075561523, "learning_rate": 3.450675476020897e-05, "loss": 1.3601, "step": 20537 }, { "epoch": 0.7355095170734328, "grad_norm": 2.049785614013672, "learning_rate": 3.449798997258599e-05, "loss": 1.4742, "step": 20538 }, { "epoch": 0.7355453292030011, "grad_norm": 1.3984603881835938, "learning_rate": 3.4489226066208025e-05, "loss": 1.2174, "step": 20539 }, { "epoch": 0.7355811413325694, "grad_norm": 1.4211353063583374, "learning_rate": 3.448046304119306e-05, "loss": 1.4096, "step": 20540 }, { "epoch": 0.7356169534621376, "grad_norm": 1.744436502456665, "learning_rate": 3.44717008976589e-05, "loss": 1.4444, "step": 20541 }, { "epoch": 0.7356527655917059, "grad_norm": 1.7035998106002808, "learning_rate": 3.446293963572349e-05, "loss": 1.3967, "step": 20542 }, { "epoch": 0.7356885777212742, "grad_norm": 1.3411145210266113, "learning_rate": 3.4454179255504726e-05, "loss": 1.2792, "step": 20543 }, { "epoch": 0.7357243898508424, "grad_norm": 1.5670628547668457, "learning_rate": 3.44454197571204e-05, "loss": 1.4644, "step": 20544 }, { "epoch": 0.7357602019804108, "grad_norm": 1.505676031112671, "learning_rate": 3.4436661140688386e-05, "loss": 1.36, "step": 20545 }, { "epoch": 0.7357960141099791, "grad_norm": 1.3100101947784424, "learning_rate": 3.442790340632652e-05, "loss": 1.2666, "step": 20546 }, { "epoch": 0.7358318262395473, "grad_norm": 1.3918676376342773, "learning_rate": 3.441914655415268e-05, "loss": 1.5261, "step": 20547 }, { "epoch": 0.7358676383691156, "grad_norm": 1.698038935661316, "learning_rate": 3.441039058428456e-05, "loss": 1.4861, "step": 20548 }, { "epoch": 0.7359034504986839, "grad_norm": 1.634504795074463, "learning_rate": 3.440163549684009e-05, "loss": 1.447, "step": 20549 }, { "epoch": 0.7359392626282522, "grad_norm": 1.5148628950119019, "learning_rate": 3.4392881291936995e-05, "loss": 1.4647, "step": 20550 }, { "epoch": 0.7359750747578204, "grad_norm": 1.5409778356552124, "learning_rate": 3.438412796969304e-05, "loss": 1.6875, "step": 20551 }, { "epoch": 0.7360108868873888, "grad_norm": 1.373879075050354, "learning_rate": 3.4375375530225984e-05, "loss": 1.3546, "step": 20552 }, { "epoch": 0.7360466990169571, "grad_norm": 1.4863821268081665, "learning_rate": 3.436662397365361e-05, "loss": 1.6509, "step": 20553 }, { "epoch": 0.7360825111465253, "grad_norm": 2.397731065750122, "learning_rate": 3.435787330009369e-05, "loss": 1.1656, "step": 20554 }, { "epoch": 0.7361183232760936, "grad_norm": 1.484554409980774, "learning_rate": 3.4349123509663874e-05, "loss": 1.5836, "step": 20555 }, { "epoch": 0.7361541354056619, "grad_norm": 1.328442096710205, "learning_rate": 3.434037460248191e-05, "loss": 1.4112, "step": 20556 }, { "epoch": 0.7361899475352301, "grad_norm": 1.321702480316162, "learning_rate": 3.433162657866552e-05, "loss": 1.3672, "step": 20557 }, { "epoch": 0.7362257596647984, "grad_norm": 2.4087703227996826, "learning_rate": 3.4322879438332414e-05, "loss": 1.1557, "step": 20558 }, { "epoch": 0.7362615717943668, "grad_norm": 1.7339824438095093, "learning_rate": 3.431413318160022e-05, "loss": 1.1772, "step": 20559 }, { "epoch": 0.7362973839239351, "grad_norm": 1.9930311441421509, "learning_rate": 3.430538780858663e-05, "loss": 1.5279, "step": 20560 }, { "epoch": 0.7363331960535033, "grad_norm": 1.6439847946166992, "learning_rate": 3.429664331940935e-05, "loss": 1.3942, "step": 20561 }, { "epoch": 0.7363690081830716, "grad_norm": 1.385252594947815, "learning_rate": 3.4287899714185944e-05, "loss": 1.2338, "step": 20562 }, { "epoch": 0.7364048203126399, "grad_norm": 1.9350563287734985, "learning_rate": 3.427915699303408e-05, "loss": 1.143, "step": 20563 }, { "epoch": 0.7364406324422081, "grad_norm": 1.605703353881836, "learning_rate": 3.427041515607139e-05, "loss": 1.2759, "step": 20564 }, { "epoch": 0.7364764445717764, "grad_norm": 1.4933596849441528, "learning_rate": 3.426167420341552e-05, "loss": 1.2269, "step": 20565 }, { "epoch": 0.7365122567013448, "grad_norm": 1.7159199714660645, "learning_rate": 3.4252934135183977e-05, "loss": 1.2173, "step": 20566 }, { "epoch": 0.7365480688309131, "grad_norm": 1.7331993579864502, "learning_rate": 3.4244194951494414e-05, "loss": 1.4148, "step": 20567 }, { "epoch": 0.7365838809604813, "grad_norm": 1.2863825559616089, "learning_rate": 3.4235456652464405e-05, "loss": 1.3893, "step": 20568 }, { "epoch": 0.7366196930900496, "grad_norm": 1.5917906761169434, "learning_rate": 3.422671923821148e-05, "loss": 1.2366, "step": 20569 }, { "epoch": 0.7366555052196179, "grad_norm": 1.5418617725372314, "learning_rate": 3.421798270885319e-05, "loss": 1.2398, "step": 20570 }, { "epoch": 0.7366913173491861, "grad_norm": 1.4444352388381958, "learning_rate": 3.420924706450711e-05, "loss": 1.5176, "step": 20571 }, { "epoch": 0.7367271294787544, "grad_norm": 1.4469548463821411, "learning_rate": 3.4200512305290764e-05, "loss": 1.4863, "step": 20572 }, { "epoch": 0.7367629416083228, "grad_norm": 1.415252923965454, "learning_rate": 3.419177843132162e-05, "loss": 1.3998, "step": 20573 }, { "epoch": 0.736798753737891, "grad_norm": 2.118942975997925, "learning_rate": 3.418304544271721e-05, "loss": 1.3851, "step": 20574 }, { "epoch": 0.7368345658674593, "grad_norm": 1.5836764574050903, "learning_rate": 3.417431333959503e-05, "loss": 1.4488, "step": 20575 }, { "epoch": 0.7368703779970276, "grad_norm": 1.3505635261535645, "learning_rate": 3.4165582122072594e-05, "loss": 1.4246, "step": 20576 }, { "epoch": 0.7369061901265959, "grad_norm": 1.835097312927246, "learning_rate": 3.4156851790267283e-05, "loss": 1.3717, "step": 20577 }, { "epoch": 0.7369420022561641, "grad_norm": 1.7995364665985107, "learning_rate": 3.4148122344296605e-05, "loss": 1.7411, "step": 20578 }, { "epoch": 0.7369778143857324, "grad_norm": 1.40411376953125, "learning_rate": 3.413939378427804e-05, "loss": 1.227, "step": 20579 }, { "epoch": 0.7370136265153008, "grad_norm": 1.34280526638031, "learning_rate": 3.413066611032894e-05, "loss": 1.7086, "step": 20580 }, { "epoch": 0.737049438644869, "grad_norm": 1.728757381439209, "learning_rate": 3.412193932256675e-05, "loss": 1.5762, "step": 20581 }, { "epoch": 0.7370852507744373, "grad_norm": 1.793157935142517, "learning_rate": 3.41132134211089e-05, "loss": 1.6236, "step": 20582 }, { "epoch": 0.7371210629040056, "grad_norm": 1.6262106895446777, "learning_rate": 3.410448840607281e-05, "loss": 1.2699, "step": 20583 }, { "epoch": 0.7371568750335739, "grad_norm": 1.7049390077590942, "learning_rate": 3.4095764277575795e-05, "loss": 1.3049, "step": 20584 }, { "epoch": 0.7371926871631421, "grad_norm": 1.4077553749084473, "learning_rate": 3.4087041035735256e-05, "loss": 1.4297, "step": 20585 }, { "epoch": 0.7372284992927104, "grad_norm": 2.50809383392334, "learning_rate": 3.40783186806686e-05, "loss": 1.3544, "step": 20586 }, { "epoch": 0.7372643114222788, "grad_norm": 1.5117441415786743, "learning_rate": 3.406959721249309e-05, "loss": 1.3596, "step": 20587 }, { "epoch": 0.737300123551847, "grad_norm": 1.937361240386963, "learning_rate": 3.406087663132611e-05, "loss": 1.3741, "step": 20588 }, { "epoch": 0.7373359356814153, "grad_norm": 1.7514429092407227, "learning_rate": 3.4052156937284984e-05, "loss": 1.0765, "step": 20589 }, { "epoch": 0.7373717478109836, "grad_norm": 1.5857789516448975, "learning_rate": 3.404343813048705e-05, "loss": 1.1952, "step": 20590 }, { "epoch": 0.7374075599405518, "grad_norm": 1.6648368835449219, "learning_rate": 3.4034720211049544e-05, "loss": 1.5579, "step": 20591 }, { "epoch": 0.7374433720701201, "grad_norm": 1.51568603515625, "learning_rate": 3.402600317908978e-05, "loss": 1.0948, "step": 20592 }, { "epoch": 0.7374791841996884, "grad_norm": 2.056521415710449, "learning_rate": 3.401728703472505e-05, "loss": 1.2874, "step": 20593 }, { "epoch": 0.7375149963292568, "grad_norm": 1.7449597120285034, "learning_rate": 3.400857177807265e-05, "loss": 1.0096, "step": 20594 }, { "epoch": 0.737550808458825, "grad_norm": 3.0433919429779053, "learning_rate": 3.399985740924976e-05, "loss": 1.276, "step": 20595 }, { "epoch": 0.7375866205883933, "grad_norm": 2.3665289878845215, "learning_rate": 3.399114392837365e-05, "loss": 1.1514, "step": 20596 }, { "epoch": 0.7376224327179616, "grad_norm": 1.5721759796142578, "learning_rate": 3.3982431335561596e-05, "loss": 1.3701, "step": 20597 }, { "epoch": 0.7376582448475298, "grad_norm": 1.6225780248641968, "learning_rate": 3.397371963093072e-05, "loss": 1.0813, "step": 20598 }, { "epoch": 0.7376940569770981, "grad_norm": 1.3954616785049438, "learning_rate": 3.39650088145983e-05, "loss": 1.326, "step": 20599 }, { "epoch": 0.7377298691066664, "grad_norm": 1.427741289138794, "learning_rate": 3.3956298886681496e-05, "loss": 1.2924, "step": 20600 }, { "epoch": 0.7377656812362348, "grad_norm": 2.5152885913848877, "learning_rate": 3.3947589847297537e-05, "loss": 1.5709, "step": 20601 }, { "epoch": 0.737801493365803, "grad_norm": 1.531655192375183, "learning_rate": 3.393888169656351e-05, "loss": 1.6402, "step": 20602 }, { "epoch": 0.7378373054953713, "grad_norm": 1.998115062713623, "learning_rate": 3.393017443459663e-05, "loss": 1.1815, "step": 20603 }, { "epoch": 0.7378731176249396, "grad_norm": 1.5857163667678833, "learning_rate": 3.392146806151405e-05, "loss": 1.403, "step": 20604 }, { "epoch": 0.7379089297545078, "grad_norm": 1.4481664896011353, "learning_rate": 3.3912762577432864e-05, "loss": 1.3232, "step": 20605 }, { "epoch": 0.7379447418840761, "grad_norm": 1.4353240728378296, "learning_rate": 3.3904057982470204e-05, "loss": 1.5147, "step": 20606 }, { "epoch": 0.7379805540136444, "grad_norm": 2.2608537673950195, "learning_rate": 3.389535427674318e-05, "loss": 1.2307, "step": 20607 }, { "epoch": 0.7380163661432128, "grad_norm": 1.633110761642456, "learning_rate": 3.3886651460368934e-05, "loss": 1.3472, "step": 20608 }, { "epoch": 0.738052178272781, "grad_norm": 1.5929673910140991, "learning_rate": 3.3877949533464485e-05, "loss": 1.7282, "step": 20609 }, { "epoch": 0.7380879904023493, "grad_norm": 1.4499571323394775, "learning_rate": 3.3869248496146935e-05, "loss": 1.5401, "step": 20610 }, { "epoch": 0.7381238025319176, "grad_norm": 2.3304126262664795, "learning_rate": 3.3860548348533326e-05, "loss": 1.6311, "step": 20611 }, { "epoch": 0.7381596146614858, "grad_norm": 1.2469449043273926, "learning_rate": 3.385184909074077e-05, "loss": 1.4746, "step": 20612 }, { "epoch": 0.7381954267910541, "grad_norm": 1.6836285591125488, "learning_rate": 3.384315072288626e-05, "loss": 1.4823, "step": 20613 }, { "epoch": 0.7382312389206224, "grad_norm": 2.1215879917144775, "learning_rate": 3.383445324508676e-05, "loss": 1.3305, "step": 20614 }, { "epoch": 0.7382670510501907, "grad_norm": 1.1666394472122192, "learning_rate": 3.382575665745941e-05, "loss": 0.8717, "step": 20615 }, { "epoch": 0.738302863179759, "grad_norm": 1.4922236204147339, "learning_rate": 3.3817060960121105e-05, "loss": 1.6702, "step": 20616 }, { "epoch": 0.7383386753093273, "grad_norm": 1.494726300239563, "learning_rate": 3.380836615318891e-05, "loss": 1.3197, "step": 20617 }, { "epoch": 0.7383744874388956, "grad_norm": 1.798538327217102, "learning_rate": 3.37996722367797e-05, "loss": 1.4822, "step": 20618 }, { "epoch": 0.7384102995684638, "grad_norm": 2.2261242866516113, "learning_rate": 3.3790979211010576e-05, "loss": 1.676, "step": 20619 }, { "epoch": 0.7384461116980321, "grad_norm": 1.9397705793380737, "learning_rate": 3.3782287075998386e-05, "loss": 1.452, "step": 20620 }, { "epoch": 0.7384819238276004, "grad_norm": 2.365359306335449, "learning_rate": 3.377359583186012e-05, "loss": 1.4546, "step": 20621 }, { "epoch": 0.7385177359571687, "grad_norm": 1.9693297147750854, "learning_rate": 3.376490547871272e-05, "loss": 1.6731, "step": 20622 }, { "epoch": 0.738553548086737, "grad_norm": 1.434880018234253, "learning_rate": 3.375621601667305e-05, "loss": 1.2206, "step": 20623 }, { "epoch": 0.7385893602163053, "grad_norm": 1.6470460891723633, "learning_rate": 3.3747527445858074e-05, "loss": 1.6089, "step": 20624 }, { "epoch": 0.7386251723458735, "grad_norm": 1.7691365480422974, "learning_rate": 3.373883976638459e-05, "loss": 1.1974, "step": 20625 }, { "epoch": 0.7386609844754418, "grad_norm": 1.4882787466049194, "learning_rate": 3.3730152978369614e-05, "loss": 1.5165, "step": 20626 }, { "epoch": 0.7386967966050101, "grad_norm": 1.4077999591827393, "learning_rate": 3.3721467081929914e-05, "loss": 1.0703, "step": 20627 }, { "epoch": 0.7387326087345784, "grad_norm": 1.4659085273742676, "learning_rate": 3.371278207718241e-05, "loss": 1.2775, "step": 20628 }, { "epoch": 0.7387684208641467, "grad_norm": 1.8654396533966064, "learning_rate": 3.370409796424386e-05, "loss": 1.6343, "step": 20629 }, { "epoch": 0.738804232993715, "grad_norm": 1.9576849937438965, "learning_rate": 3.369541474323122e-05, "loss": 1.2882, "step": 20630 }, { "epoch": 0.7388400451232833, "grad_norm": 1.4226261377334595, "learning_rate": 3.3686732414261254e-05, "loss": 1.4776, "step": 20631 }, { "epoch": 0.7388758572528515, "grad_norm": 1.3740144968032837, "learning_rate": 3.367805097745069e-05, "loss": 1.4068, "step": 20632 }, { "epoch": 0.7389116693824198, "grad_norm": 1.6114263534545898, "learning_rate": 3.366937043291648e-05, "loss": 1.3795, "step": 20633 }, { "epoch": 0.7389474815119881, "grad_norm": 2.5558178424835205, "learning_rate": 3.3660690780775286e-05, "loss": 1.7663, "step": 20634 }, { "epoch": 0.7389832936415563, "grad_norm": 1.6846691370010376, "learning_rate": 3.3652012021143964e-05, "loss": 1.6188, "step": 20635 }, { "epoch": 0.7390191057711247, "grad_norm": 1.8610543012619019, "learning_rate": 3.364333415413917e-05, "loss": 1.2382, "step": 20636 }, { "epoch": 0.739054917900693, "grad_norm": 1.7064975500106812, "learning_rate": 3.363465717987778e-05, "loss": 1.6473, "step": 20637 }, { "epoch": 0.7390907300302613, "grad_norm": 1.726022720336914, "learning_rate": 3.3625981098476444e-05, "loss": 1.4555, "step": 20638 }, { "epoch": 0.7391265421598295, "grad_norm": 1.8379038572311401, "learning_rate": 3.3617305910051956e-05, "loss": 1.499, "step": 20639 }, { "epoch": 0.7391623542893978, "grad_norm": 1.7083884477615356, "learning_rate": 3.3608631614720955e-05, "loss": 1.7173, "step": 20640 }, { "epoch": 0.7391981664189661, "grad_norm": 1.3180886507034302, "learning_rate": 3.359995821260017e-05, "loss": 1.41, "step": 20641 }, { "epoch": 0.7392339785485343, "grad_norm": 1.614916443824768, "learning_rate": 3.359128570380633e-05, "loss": 1.2589, "step": 20642 }, { "epoch": 0.7392697906781027, "grad_norm": 1.9553498029708862, "learning_rate": 3.3582614088456055e-05, "loss": 1.4967, "step": 20643 }, { "epoch": 0.739305602807671, "grad_norm": 1.4962507486343384, "learning_rate": 3.3573943366666026e-05, "loss": 1.3142, "step": 20644 }, { "epoch": 0.7393414149372393, "grad_norm": 1.409155249595642, "learning_rate": 3.356527353855291e-05, "loss": 1.4818, "step": 20645 }, { "epoch": 0.7393772270668075, "grad_norm": 1.665363073348999, "learning_rate": 3.355660460423338e-05, "loss": 1.1948, "step": 20646 }, { "epoch": 0.7394130391963758, "grad_norm": 1.674485445022583, "learning_rate": 3.354793656382399e-05, "loss": 1.4648, "step": 20647 }, { "epoch": 0.7394488513259441, "grad_norm": 1.315306305885315, "learning_rate": 3.35392694174414e-05, "loss": 1.4364, "step": 20648 }, { "epoch": 0.7394846634555123, "grad_norm": 1.6919763088226318, "learning_rate": 3.3530603165202245e-05, "loss": 1.3387, "step": 20649 }, { "epoch": 0.7395204755850807, "grad_norm": 1.8464726209640503, "learning_rate": 3.352193780722306e-05, "loss": 1.4225, "step": 20650 }, { "epoch": 0.739556287714649, "grad_norm": 1.7736725807189941, "learning_rate": 3.351327334362043e-05, "loss": 1.352, "step": 20651 }, { "epoch": 0.7395920998442173, "grad_norm": 1.6074994802474976, "learning_rate": 3.3504609774510964e-05, "loss": 1.3402, "step": 20652 }, { "epoch": 0.7396279119737855, "grad_norm": 1.3930764198303223, "learning_rate": 3.349594710001123e-05, "loss": 1.3765, "step": 20653 }, { "epoch": 0.7396637241033538, "grad_norm": 1.6473559141159058, "learning_rate": 3.3487285320237705e-05, "loss": 1.2232, "step": 20654 }, { "epoch": 0.7396995362329221, "grad_norm": 1.8392513990402222, "learning_rate": 3.347862443530697e-05, "loss": 1.4693, "step": 20655 }, { "epoch": 0.7397353483624903, "grad_norm": 1.4013627767562866, "learning_rate": 3.3469964445335566e-05, "loss": 1.1409, "step": 20656 }, { "epoch": 0.7397711604920587, "grad_norm": 1.6355818510055542, "learning_rate": 3.346130535043993e-05, "loss": 1.2311, "step": 20657 }, { "epoch": 0.739806972621627, "grad_norm": 1.5286083221435547, "learning_rate": 3.3452647150736615e-05, "loss": 1.3021, "step": 20658 }, { "epoch": 0.7398427847511952, "grad_norm": 1.5414046049118042, "learning_rate": 3.3443989846342084e-05, "loss": 1.1697, "step": 20659 }, { "epoch": 0.7398785968807635, "grad_norm": 1.3940845727920532, "learning_rate": 3.3435333437372854e-05, "loss": 1.5052, "step": 20660 }, { "epoch": 0.7399144090103318, "grad_norm": 1.6988524198532104, "learning_rate": 3.3426677923945314e-05, "loss": 1.4081, "step": 20661 }, { "epoch": 0.7399502211399, "grad_norm": 1.670135498046875, "learning_rate": 3.341802330617596e-05, "loss": 1.4241, "step": 20662 }, { "epoch": 0.7399860332694683, "grad_norm": 1.8642773628234863, "learning_rate": 3.3409369584181216e-05, "loss": 1.4035, "step": 20663 }, { "epoch": 0.7400218453990367, "grad_norm": 1.446311593055725, "learning_rate": 3.340071675807753e-05, "loss": 1.6849, "step": 20664 }, { "epoch": 0.740057657528605, "grad_norm": 1.5131405591964722, "learning_rate": 3.3392064827981275e-05, "loss": 1.6785, "step": 20665 }, { "epoch": 0.7400934696581732, "grad_norm": 2.638115406036377, "learning_rate": 3.338341379400885e-05, "loss": 1.3795, "step": 20666 }, { "epoch": 0.7401292817877415, "grad_norm": 1.5250235795974731, "learning_rate": 3.337476365627672e-05, "loss": 1.421, "step": 20667 }, { "epoch": 0.7401650939173098, "grad_norm": 1.3569045066833496, "learning_rate": 3.336611441490115e-05, "loss": 1.533, "step": 20668 }, { "epoch": 0.740200906046878, "grad_norm": 1.659124493598938, "learning_rate": 3.335746606999858e-05, "loss": 1.3057, "step": 20669 }, { "epoch": 0.7402367181764463, "grad_norm": 1.662147045135498, "learning_rate": 3.334881862168532e-05, "loss": 1.3973, "step": 20670 }, { "epoch": 0.7402725303060147, "grad_norm": 1.852306604385376, "learning_rate": 3.334017207007778e-05, "loss": 1.5306, "step": 20671 }, { "epoch": 0.740308342435583, "grad_norm": 1.557503581047058, "learning_rate": 3.33315264152922e-05, "loss": 1.2448, "step": 20672 }, { "epoch": 0.7403441545651512, "grad_norm": 2.1327314376831055, "learning_rate": 3.332288165744494e-05, "loss": 1.3542, "step": 20673 }, { "epoch": 0.7403799666947195, "grad_norm": 1.6384520530700684, "learning_rate": 3.3314237796652324e-05, "loss": 1.7015, "step": 20674 }, { "epoch": 0.7404157788242878, "grad_norm": 1.7940348386764526, "learning_rate": 3.33055948330306e-05, "loss": 1.4875, "step": 20675 }, { "epoch": 0.740451590953856, "grad_norm": 2.3545427322387695, "learning_rate": 3.329695276669605e-05, "loss": 1.3333, "step": 20676 }, { "epoch": 0.7404874030834243, "grad_norm": 1.4605990648269653, "learning_rate": 3.3288311597764976e-05, "loss": 1.713, "step": 20677 }, { "epoch": 0.7405232152129927, "grad_norm": 2.598592758178711, "learning_rate": 3.327967132635364e-05, "loss": 1.4987, "step": 20678 }, { "epoch": 0.740559027342561, "grad_norm": 1.3932163715362549, "learning_rate": 3.3271031952578245e-05, "loss": 1.2992, "step": 20679 }, { "epoch": 0.7405948394721292, "grad_norm": 2.0070300102233887, "learning_rate": 3.326239347655503e-05, "loss": 1.5381, "step": 20680 }, { "epoch": 0.7406306516016975, "grad_norm": 1.4914416074752808, "learning_rate": 3.325375589840023e-05, "loss": 1.4634, "step": 20681 }, { "epoch": 0.7406664637312658, "grad_norm": 1.81553053855896, "learning_rate": 3.3245119218230066e-05, "loss": 1.3835, "step": 20682 }, { "epoch": 0.740702275860834, "grad_norm": 1.629849910736084, "learning_rate": 3.32364834361607e-05, "loss": 1.4646, "step": 20683 }, { "epoch": 0.7407380879904023, "grad_norm": 1.6208226680755615, "learning_rate": 3.3227848552308326e-05, "loss": 1.4159, "step": 20684 }, { "epoch": 0.7407739001199707, "grad_norm": 1.9516637325286865, "learning_rate": 3.321921456678915e-05, "loss": 1.3905, "step": 20685 }, { "epoch": 0.740809712249539, "grad_norm": 1.5003976821899414, "learning_rate": 3.321058147971927e-05, "loss": 1.7165, "step": 20686 }, { "epoch": 0.7408455243791072, "grad_norm": 1.5799978971481323, "learning_rate": 3.320194929121486e-05, "loss": 1.6016, "step": 20687 }, { "epoch": 0.7408813365086755, "grad_norm": 1.946989893913269, "learning_rate": 3.319331800139207e-05, "loss": 1.8459, "step": 20688 }, { "epoch": 0.7409171486382438, "grad_norm": 1.4369703531265259, "learning_rate": 3.318468761036704e-05, "loss": 1.5164, "step": 20689 }, { "epoch": 0.740952960767812, "grad_norm": 1.370896339416504, "learning_rate": 3.3176058118255816e-05, "loss": 1.4781, "step": 20690 }, { "epoch": 0.7409887728973803, "grad_norm": 1.3151863813400269, "learning_rate": 3.316742952517453e-05, "loss": 1.3523, "step": 20691 }, { "epoch": 0.7410245850269487, "grad_norm": 1.8912273645401, "learning_rate": 3.3158801831239314e-05, "loss": 1.345, "step": 20692 }, { "epoch": 0.741060397156517, "grad_norm": 1.3911374807357788, "learning_rate": 3.3150175036566166e-05, "loss": 1.6394, "step": 20693 }, { "epoch": 0.7410962092860852, "grad_norm": 1.4584274291992188, "learning_rate": 3.314154914127118e-05, "loss": 1.3457, "step": 20694 }, { "epoch": 0.7411320214156535, "grad_norm": 1.4885680675506592, "learning_rate": 3.31329241454704e-05, "loss": 1.709, "step": 20695 }, { "epoch": 0.7411678335452218, "grad_norm": 2.0191259384155273, "learning_rate": 3.312430004927992e-05, "loss": 1.3875, "step": 20696 }, { "epoch": 0.74120364567479, "grad_norm": 2.2317590713500977, "learning_rate": 3.311567685281568e-05, "loss": 1.6509, "step": 20697 }, { "epoch": 0.7412394578043583, "grad_norm": 1.3376753330230713, "learning_rate": 3.310705455619374e-05, "loss": 1.3804, "step": 20698 }, { "epoch": 0.7412752699339267, "grad_norm": 1.5647464990615845, "learning_rate": 3.309843315953008e-05, "loss": 1.2623, "step": 20699 }, { "epoch": 0.7413110820634949, "grad_norm": 1.6733916997909546, "learning_rate": 3.3089812662940754e-05, "loss": 1.1969, "step": 20700 }, { "epoch": 0.7413468941930632, "grad_norm": 1.6514251232147217, "learning_rate": 3.308119306654168e-05, "loss": 1.2187, "step": 20701 }, { "epoch": 0.7413827063226315, "grad_norm": 1.6864891052246094, "learning_rate": 3.3072574370448783e-05, "loss": 1.5163, "step": 20702 }, { "epoch": 0.7414185184521997, "grad_norm": 1.344460129737854, "learning_rate": 3.306395657477812e-05, "loss": 1.4425, "step": 20703 }, { "epoch": 0.741454330581768, "grad_norm": 1.5931432247161865, "learning_rate": 3.3055339679645544e-05, "loss": 1.5247, "step": 20704 }, { "epoch": 0.7414901427113363, "grad_norm": 2.037649631500244, "learning_rate": 3.304672368516704e-05, "loss": 1.5746, "step": 20705 }, { "epoch": 0.7415259548409047, "grad_norm": 1.2448827028274536, "learning_rate": 3.303810859145848e-05, "loss": 1.4724, "step": 20706 }, { "epoch": 0.7415617669704729, "grad_norm": 1.5991346836090088, "learning_rate": 3.302949439863584e-05, "loss": 1.6548, "step": 20707 }, { "epoch": 0.7415975791000412, "grad_norm": 1.6858115196228027, "learning_rate": 3.3020881106814936e-05, "loss": 1.6506, "step": 20708 }, { "epoch": 0.7416333912296095, "grad_norm": 1.7268314361572266, "learning_rate": 3.301226871611168e-05, "loss": 1.278, "step": 20709 }, { "epoch": 0.7416692033591777, "grad_norm": 1.3854961395263672, "learning_rate": 3.3003657226641974e-05, "loss": 1.3806, "step": 20710 }, { "epoch": 0.741705015488746, "grad_norm": 1.8541377782821655, "learning_rate": 3.2995046638521595e-05, "loss": 1.0329, "step": 20711 }, { "epoch": 0.7417408276183143, "grad_norm": 1.6589943170547485, "learning_rate": 3.2986436951866486e-05, "loss": 1.3059, "step": 20712 }, { "epoch": 0.7417766397478827, "grad_norm": 1.4660264253616333, "learning_rate": 3.2977828166792345e-05, "loss": 1.7456, "step": 20713 }, { "epoch": 0.7418124518774509, "grad_norm": 1.5078004598617554, "learning_rate": 3.296922028341515e-05, "loss": 1.3345, "step": 20714 }, { "epoch": 0.7418482640070192, "grad_norm": 1.6876119375228882, "learning_rate": 3.29606133018506e-05, "loss": 1.5791, "step": 20715 }, { "epoch": 0.7418840761365875, "grad_norm": 1.7531828880310059, "learning_rate": 3.2952007222214545e-05, "loss": 1.1627, "step": 20716 }, { "epoch": 0.7419198882661557, "grad_norm": 3.027247667312622, "learning_rate": 3.29434020446227e-05, "loss": 1.1017, "step": 20717 }, { "epoch": 0.741955700395724, "grad_norm": 1.8014774322509766, "learning_rate": 3.293479776919093e-05, "loss": 1.4673, "step": 20718 }, { "epoch": 0.7419915125252923, "grad_norm": 1.9687037467956543, "learning_rate": 3.292619439603495e-05, "loss": 1.5301, "step": 20719 }, { "epoch": 0.7420273246548607, "grad_norm": 1.6570146083831787, "learning_rate": 3.291759192527045e-05, "loss": 1.5799, "step": 20720 }, { "epoch": 0.7420631367844289, "grad_norm": 1.3398667573928833, "learning_rate": 3.290899035701328e-05, "loss": 1.5945, "step": 20721 }, { "epoch": 0.7420989489139972, "grad_norm": 1.6556895971298218, "learning_rate": 3.2900389691379074e-05, "loss": 1.3598, "step": 20722 }, { "epoch": 0.7421347610435655, "grad_norm": 1.4631439447402954, "learning_rate": 3.2891789928483594e-05, "loss": 1.1738, "step": 20723 }, { "epoch": 0.7421705731731337, "grad_norm": 1.3914145231246948, "learning_rate": 3.2883191068442464e-05, "loss": 1.4977, "step": 20724 }, { "epoch": 0.742206385302702, "grad_norm": 1.4877334833145142, "learning_rate": 3.287459311137149e-05, "loss": 1.7173, "step": 20725 }, { "epoch": 0.7422421974322703, "grad_norm": 1.5080418586730957, "learning_rate": 3.286599605738624e-05, "loss": 1.361, "step": 20726 }, { "epoch": 0.7422780095618386, "grad_norm": 1.6488600969314575, "learning_rate": 3.285739990660246e-05, "loss": 1.0568, "step": 20727 }, { "epoch": 0.7423138216914069, "grad_norm": 1.9895234107971191, "learning_rate": 3.284880465913571e-05, "loss": 1.6501, "step": 20728 }, { "epoch": 0.7423496338209752, "grad_norm": 1.7913322448730469, "learning_rate": 3.284021031510168e-05, "loss": 1.395, "step": 20729 }, { "epoch": 0.7423854459505435, "grad_norm": 1.5944781303405762, "learning_rate": 3.2831616874616036e-05, "loss": 1.5749, "step": 20730 }, { "epoch": 0.7424212580801117, "grad_norm": 1.548054814338684, "learning_rate": 3.282302433779426e-05, "loss": 1.6684, "step": 20731 }, { "epoch": 0.74245707020968, "grad_norm": 1.558003306388855, "learning_rate": 3.281443270475212e-05, "loss": 1.3898, "step": 20732 }, { "epoch": 0.7424928823392483, "grad_norm": 1.7061145305633545, "learning_rate": 3.280584197560508e-05, "loss": 1.7347, "step": 20733 }, { "epoch": 0.7425286944688166, "grad_norm": 1.6126638650894165, "learning_rate": 3.2797252150468804e-05, "loss": 1.5521, "step": 20734 }, { "epoch": 0.7425645065983849, "grad_norm": 1.6583861112594604, "learning_rate": 3.278866322945874e-05, "loss": 1.4871, "step": 20735 }, { "epoch": 0.7426003187279532, "grad_norm": 1.5712144374847412, "learning_rate": 3.278007521269059e-05, "loss": 1.5044, "step": 20736 }, { "epoch": 0.7426361308575214, "grad_norm": 2.0407989025115967, "learning_rate": 3.2771488100279814e-05, "loss": 1.4875, "step": 20737 }, { "epoch": 0.7426719429870897, "grad_norm": 1.7771286964416504, "learning_rate": 3.2762901892341926e-05, "loss": 1.2809, "step": 20738 }, { "epoch": 0.742707755116658, "grad_norm": 2.0323855876922607, "learning_rate": 3.2754316588992454e-05, "loss": 1.4569, "step": 20739 }, { "epoch": 0.7427435672462263, "grad_norm": 1.6384506225585938, "learning_rate": 3.274573219034691e-05, "loss": 1.7623, "step": 20740 }, { "epoch": 0.7427793793757946, "grad_norm": 1.7023667097091675, "learning_rate": 3.2737148696520824e-05, "loss": 1.3912, "step": 20741 }, { "epoch": 0.7428151915053629, "grad_norm": 1.7247306108474731, "learning_rate": 3.272856610762961e-05, "loss": 1.496, "step": 20742 }, { "epoch": 0.7428510036349312, "grad_norm": 1.7189574241638184, "learning_rate": 3.271998442378875e-05, "loss": 1.613, "step": 20743 }, { "epoch": 0.7428868157644994, "grad_norm": 2.2422940731048584, "learning_rate": 3.271140364511377e-05, "loss": 1.7311, "step": 20744 }, { "epoch": 0.7429226278940677, "grad_norm": 1.8080083131790161, "learning_rate": 3.270282377172001e-05, "loss": 1.3932, "step": 20745 }, { "epoch": 0.742958440023636, "grad_norm": 1.682898759841919, "learning_rate": 3.269424480372295e-05, "loss": 1.6761, "step": 20746 }, { "epoch": 0.7429942521532042, "grad_norm": 1.3644120693206787, "learning_rate": 3.268566674123802e-05, "loss": 1.4248, "step": 20747 }, { "epoch": 0.7430300642827726, "grad_norm": 2.0592284202575684, "learning_rate": 3.267708958438063e-05, "loss": 1.3517, "step": 20748 }, { "epoch": 0.7430658764123409, "grad_norm": 1.3781828880310059, "learning_rate": 3.266851333326614e-05, "loss": 1.4983, "step": 20749 }, { "epoch": 0.7431016885419092, "grad_norm": 1.7163243293762207, "learning_rate": 3.265993798800995e-05, "loss": 1.0821, "step": 20750 }, { "epoch": 0.7431375006714774, "grad_norm": 1.565303087234497, "learning_rate": 3.265136354872742e-05, "loss": 1.3198, "step": 20751 }, { "epoch": 0.7431733128010457, "grad_norm": 1.516454815864563, "learning_rate": 3.2642790015533965e-05, "loss": 1.1464, "step": 20752 }, { "epoch": 0.743209124930614, "grad_norm": 1.5399000644683838, "learning_rate": 3.2634217388544855e-05, "loss": 1.6111, "step": 20753 }, { "epoch": 0.7432449370601822, "grad_norm": 1.608729600906372, "learning_rate": 3.2625645667875434e-05, "loss": 1.4164, "step": 20754 }, { "epoch": 0.7432807491897506, "grad_norm": 1.7107634544372559, "learning_rate": 3.26170748536411e-05, "loss": 1.4349, "step": 20755 }, { "epoch": 0.7433165613193189, "grad_norm": 1.5795230865478516, "learning_rate": 3.260850494595707e-05, "loss": 1.3175, "step": 20756 }, { "epoch": 0.7433523734488872, "grad_norm": 1.8577889204025269, "learning_rate": 3.259993594493866e-05, "loss": 1.6718, "step": 20757 }, { "epoch": 0.7433881855784554, "grad_norm": 1.8486729860305786, "learning_rate": 3.2591367850701194e-05, "loss": 1.424, "step": 20758 }, { "epoch": 0.7434239977080237, "grad_norm": 1.7383266687393188, "learning_rate": 3.2582800663359933e-05, "loss": 1.584, "step": 20759 }, { "epoch": 0.743459809837592, "grad_norm": 1.9045100212097168, "learning_rate": 3.257423438303011e-05, "loss": 1.4333, "step": 20760 }, { "epoch": 0.7434956219671602, "grad_norm": 1.3774325847625732, "learning_rate": 3.256566900982699e-05, "loss": 1.4165, "step": 20761 }, { "epoch": 0.7435314340967286, "grad_norm": 1.4504196643829346, "learning_rate": 3.255710454386585e-05, "loss": 1.3648, "step": 20762 }, { "epoch": 0.7435672462262969, "grad_norm": 1.3335399627685547, "learning_rate": 3.2548540985261824e-05, "loss": 1.5259, "step": 20763 }, { "epoch": 0.7436030583558652, "grad_norm": 1.8252145051956177, "learning_rate": 3.2539978334130174e-05, "loss": 1.2774, "step": 20764 }, { "epoch": 0.7436388704854334, "grad_norm": 1.58820378780365, "learning_rate": 3.253141659058611e-05, "loss": 1.4351, "step": 20765 }, { "epoch": 0.7436746826150017, "grad_norm": 1.7370541095733643, "learning_rate": 3.252285575474483e-05, "loss": 1.6168, "step": 20766 }, { "epoch": 0.74371049474457, "grad_norm": 2.1864709854125977, "learning_rate": 3.251429582672145e-05, "loss": 2.0227, "step": 20767 }, { "epoch": 0.7437463068741382, "grad_norm": 2.4286386966705322, "learning_rate": 3.2505736806631185e-05, "loss": 1.8233, "step": 20768 }, { "epoch": 0.7437821190037066, "grad_norm": 1.746517300605774, "learning_rate": 3.249717869458916e-05, "loss": 1.2833, "step": 20769 }, { "epoch": 0.7438179311332749, "grad_norm": 1.7618461847305298, "learning_rate": 3.248862149071056e-05, "loss": 1.5458, "step": 20770 }, { "epoch": 0.7438537432628431, "grad_norm": 1.8682754039764404, "learning_rate": 3.248006519511043e-05, "loss": 1.6408, "step": 20771 }, { "epoch": 0.7438895553924114, "grad_norm": 1.6848514080047607, "learning_rate": 3.247150980790394e-05, "loss": 1.5804, "step": 20772 }, { "epoch": 0.7439253675219797, "grad_norm": 1.2802892923355103, "learning_rate": 3.2462955329206213e-05, "loss": 1.3587, "step": 20773 }, { "epoch": 0.743961179651548, "grad_norm": 1.8104581832885742, "learning_rate": 3.245440175913227e-05, "loss": 1.4791, "step": 20774 }, { "epoch": 0.7439969917811162, "grad_norm": 1.7056621313095093, "learning_rate": 3.244584909779722e-05, "loss": 1.6028, "step": 20775 }, { "epoch": 0.7440328039106846, "grad_norm": 2.1532299518585205, "learning_rate": 3.243729734531614e-05, "loss": 1.3802, "step": 20776 }, { "epoch": 0.7440686160402529, "grad_norm": 1.530900239944458, "learning_rate": 3.2428746501804106e-05, "loss": 1.5308, "step": 20777 }, { "epoch": 0.7441044281698211, "grad_norm": 1.56569242477417, "learning_rate": 3.2420196567376096e-05, "loss": 1.538, "step": 20778 }, { "epoch": 0.7441402402993894, "grad_norm": 1.6894400119781494, "learning_rate": 3.241164754214716e-05, "loss": 1.3194, "step": 20779 }, { "epoch": 0.7441760524289577, "grad_norm": 2.2958922386169434, "learning_rate": 3.2403099426232365e-05, "loss": 1.4457, "step": 20780 }, { "epoch": 0.744211864558526, "grad_norm": 1.536910057067871, "learning_rate": 3.239455221974663e-05, "loss": 0.9383, "step": 20781 }, { "epoch": 0.7442476766880942, "grad_norm": 1.4549648761749268, "learning_rate": 3.2386005922804996e-05, "loss": 1.4439, "step": 20782 }, { "epoch": 0.7442834888176626, "grad_norm": 1.574837327003479, "learning_rate": 3.237746053552244e-05, "loss": 1.3761, "step": 20783 }, { "epoch": 0.7443193009472309, "grad_norm": 1.8946826457977295, "learning_rate": 3.2368916058013956e-05, "loss": 1.4469, "step": 20784 }, { "epoch": 0.7443551130767991, "grad_norm": 2.358954906463623, "learning_rate": 3.236037249039444e-05, "loss": 1.6472, "step": 20785 }, { "epoch": 0.7443909252063674, "grad_norm": 1.663940668106079, "learning_rate": 3.235182983277886e-05, "loss": 1.4214, "step": 20786 }, { "epoch": 0.7444267373359357, "grad_norm": 1.7048293352127075, "learning_rate": 3.234328808528215e-05, "loss": 1.4342, "step": 20787 }, { "epoch": 0.7444625494655039, "grad_norm": 1.6530091762542725, "learning_rate": 3.233474724801926e-05, "loss": 1.5719, "step": 20788 }, { "epoch": 0.7444983615950722, "grad_norm": 1.466918706893921, "learning_rate": 3.232620732110503e-05, "loss": 1.3972, "step": 20789 }, { "epoch": 0.7445341737246406, "grad_norm": 1.7518681287765503, "learning_rate": 3.231766830465439e-05, "loss": 1.5575, "step": 20790 }, { "epoch": 0.7445699858542089, "grad_norm": 1.4471641778945923, "learning_rate": 3.230913019878224e-05, "loss": 1.448, "step": 20791 }, { "epoch": 0.7446057979837771, "grad_norm": 1.9745919704437256, "learning_rate": 3.230059300360342e-05, "loss": 1.4811, "step": 20792 }, { "epoch": 0.7446416101133454, "grad_norm": 1.7885055541992188, "learning_rate": 3.229205671923278e-05, "loss": 1.5635, "step": 20793 }, { "epoch": 0.7446774222429137, "grad_norm": 1.5172529220581055, "learning_rate": 3.2283521345785176e-05, "loss": 1.5204, "step": 20794 }, { "epoch": 0.7447132343724819, "grad_norm": 1.5513358116149902, "learning_rate": 3.227498688337548e-05, "loss": 1.4141, "step": 20795 }, { "epoch": 0.7447490465020502, "grad_norm": 1.9045683145523071, "learning_rate": 3.226645333211845e-05, "loss": 1.4086, "step": 20796 }, { "epoch": 0.7447848586316185, "grad_norm": 1.5256716012954712, "learning_rate": 3.225792069212892e-05, "loss": 1.5432, "step": 20797 }, { "epoch": 0.7448206707611869, "grad_norm": 1.4963194131851196, "learning_rate": 3.224938896352171e-05, "loss": 1.1279, "step": 20798 }, { "epoch": 0.7448564828907551, "grad_norm": 1.3626344203948975, "learning_rate": 3.2240858146411546e-05, "loss": 1.1153, "step": 20799 }, { "epoch": 0.7448922950203234, "grad_norm": 1.44807767868042, "learning_rate": 3.2232328240913277e-05, "loss": 1.3652, "step": 20800 }, { "epoch": 0.7449281071498917, "grad_norm": 2.269850492477417, "learning_rate": 3.222379924714155e-05, "loss": 1.1314, "step": 20801 }, { "epoch": 0.7449639192794599, "grad_norm": 1.7084736824035645, "learning_rate": 3.221527116521124e-05, "loss": 1.5226, "step": 20802 }, { "epoch": 0.7449997314090282, "grad_norm": 2.213249444961548, "learning_rate": 3.220674399523699e-05, "loss": 1.3941, "step": 20803 }, { "epoch": 0.7450355435385965, "grad_norm": 1.5036464929580688, "learning_rate": 3.219821773733355e-05, "loss": 1.5555, "step": 20804 }, { "epoch": 0.7450713556681648, "grad_norm": 1.3128011226654053, "learning_rate": 3.218969239161563e-05, "loss": 1.3888, "step": 20805 }, { "epoch": 0.7451071677977331, "grad_norm": 1.5558034181594849, "learning_rate": 3.2181167958197964e-05, "loss": 1.2282, "step": 20806 }, { "epoch": 0.7451429799273014, "grad_norm": 2.2121920585632324, "learning_rate": 3.2172644437195207e-05, "loss": 1.733, "step": 20807 }, { "epoch": 0.7451787920568697, "grad_norm": 1.9016462564468384, "learning_rate": 3.216412182872196e-05, "loss": 1.4272, "step": 20808 }, { "epoch": 0.7452146041864379, "grad_norm": 1.629558801651001, "learning_rate": 3.215560013289301e-05, "loss": 1.5492, "step": 20809 }, { "epoch": 0.7452504163160062, "grad_norm": 3.6835665702819824, "learning_rate": 3.2147079349822925e-05, "loss": 1.5285, "step": 20810 }, { "epoch": 0.7452862284455745, "grad_norm": 1.4725559949874878, "learning_rate": 3.2138559479626395e-05, "loss": 1.4868, "step": 20811 }, { "epoch": 0.7453220405751428, "grad_norm": 1.4919462203979492, "learning_rate": 3.2130040522417946e-05, "loss": 1.5146, "step": 20812 }, { "epoch": 0.7453578527047111, "grad_norm": 1.767691731452942, "learning_rate": 3.212152247831233e-05, "loss": 1.2523, "step": 20813 }, { "epoch": 0.7453936648342794, "grad_norm": 1.4259493350982666, "learning_rate": 3.211300534742402e-05, "loss": 1.7543, "step": 20814 }, { "epoch": 0.7454294769638476, "grad_norm": 1.6164785623550415, "learning_rate": 3.210448912986767e-05, "loss": 1.5031, "step": 20815 }, { "epoch": 0.7454652890934159, "grad_norm": 1.4968827962875366, "learning_rate": 3.209597382575786e-05, "loss": 1.4044, "step": 20816 }, { "epoch": 0.7455011012229842, "grad_norm": 1.9254761934280396, "learning_rate": 3.208745943520911e-05, "loss": 0.9738, "step": 20817 }, { "epoch": 0.7455369133525525, "grad_norm": 1.466422438621521, "learning_rate": 3.207894595833603e-05, "loss": 1.3917, "step": 20818 }, { "epoch": 0.7455727254821208, "grad_norm": 1.6191308498382568, "learning_rate": 3.207043339525304e-05, "loss": 1.6631, "step": 20819 }, { "epoch": 0.7456085376116891, "grad_norm": 1.7039192914962769, "learning_rate": 3.206192174607482e-05, "loss": 1.4387, "step": 20820 }, { "epoch": 0.7456443497412574, "grad_norm": 1.9393994808197021, "learning_rate": 3.205341101091578e-05, "loss": 1.2903, "step": 20821 }, { "epoch": 0.7456801618708256, "grad_norm": 1.7326945066452026, "learning_rate": 3.2044901189890473e-05, "loss": 1.5565, "step": 20822 }, { "epoch": 0.7457159740003939, "grad_norm": 1.9713574647903442, "learning_rate": 3.2036392283113304e-05, "loss": 1.3975, "step": 20823 }, { "epoch": 0.7457517861299622, "grad_norm": 1.6577719449996948, "learning_rate": 3.202788429069887e-05, "loss": 1.582, "step": 20824 }, { "epoch": 0.7457875982595304, "grad_norm": 1.6279882192611694, "learning_rate": 3.201937721276159e-05, "loss": 1.5023, "step": 20825 }, { "epoch": 0.7458234103890988, "grad_norm": 2.0655462741851807, "learning_rate": 3.201087104941586e-05, "loss": 1.4759, "step": 20826 }, { "epoch": 0.7458592225186671, "grad_norm": 1.3762956857681274, "learning_rate": 3.2002365800776154e-05, "loss": 1.3902, "step": 20827 }, { "epoch": 0.7458950346482354, "grad_norm": 1.3251439332962036, "learning_rate": 3.199386146695691e-05, "loss": 1.4928, "step": 20828 }, { "epoch": 0.7459308467778036, "grad_norm": 1.480262041091919, "learning_rate": 3.1985358048072574e-05, "loss": 1.4624, "step": 20829 }, { "epoch": 0.7459666589073719, "grad_norm": 1.2799279689788818, "learning_rate": 3.197685554423745e-05, "loss": 1.3353, "step": 20830 }, { "epoch": 0.7460024710369402, "grad_norm": 1.4563822746276855, "learning_rate": 3.1968353955566045e-05, "loss": 1.0802, "step": 20831 }, { "epoch": 0.7460382831665084, "grad_norm": 1.2952502965927124, "learning_rate": 3.195985328217266e-05, "loss": 1.1515, "step": 20832 }, { "epoch": 0.7460740952960768, "grad_norm": 1.3360506296157837, "learning_rate": 3.1951353524171715e-05, "loss": 1.5667, "step": 20833 }, { "epoch": 0.7461099074256451, "grad_norm": 1.5215046405792236, "learning_rate": 3.194285468167749e-05, "loss": 1.424, "step": 20834 }, { "epoch": 0.7461457195552134, "grad_norm": 1.5855753421783447, "learning_rate": 3.1934356754804385e-05, "loss": 1.4095, "step": 20835 }, { "epoch": 0.7461815316847816, "grad_norm": 1.7662148475646973, "learning_rate": 3.192585974366673e-05, "loss": 1.4234, "step": 20836 }, { "epoch": 0.7462173438143499, "grad_norm": 1.916852593421936, "learning_rate": 3.19173636483788e-05, "loss": 1.4165, "step": 20837 }, { "epoch": 0.7462531559439182, "grad_norm": 1.7372554540634155, "learning_rate": 3.190886846905491e-05, "loss": 1.3308, "step": 20838 }, { "epoch": 0.7462889680734864, "grad_norm": 1.336731195449829, "learning_rate": 3.190037420580937e-05, "loss": 1.2971, "step": 20839 }, { "epoch": 0.7463247802030548, "grad_norm": 1.8577148914337158, "learning_rate": 3.1891880858756484e-05, "loss": 1.6954, "step": 20840 }, { "epoch": 0.7463605923326231, "grad_norm": 2.185137987136841, "learning_rate": 3.1883388428010465e-05, "loss": 1.6788, "step": 20841 }, { "epoch": 0.7463964044621914, "grad_norm": 1.7305773496627808, "learning_rate": 3.187489691368558e-05, "loss": 1.7874, "step": 20842 }, { "epoch": 0.7464322165917596, "grad_norm": 1.6137861013412476, "learning_rate": 3.186640631589611e-05, "loss": 1.2475, "step": 20843 }, { "epoch": 0.7464680287213279, "grad_norm": 1.5062600374221802, "learning_rate": 3.1857916634756234e-05, "loss": 1.2966, "step": 20844 }, { "epoch": 0.7465038408508962, "grad_norm": 1.881579875946045, "learning_rate": 3.184942787038019e-05, "loss": 1.6763, "step": 20845 }, { "epoch": 0.7465396529804644, "grad_norm": 1.7157776355743408, "learning_rate": 3.184094002288219e-05, "loss": 1.4101, "step": 20846 }, { "epoch": 0.7465754651100328, "grad_norm": 1.5331974029541016, "learning_rate": 3.1832453092376446e-05, "loss": 1.4136, "step": 20847 }, { "epoch": 0.7466112772396011, "grad_norm": 2.0889968872070312, "learning_rate": 3.182396707897709e-05, "loss": 1.2539, "step": 20848 }, { "epoch": 0.7466470893691693, "grad_norm": 1.3212164640426636, "learning_rate": 3.1815481982798324e-05, "loss": 1.3957, "step": 20849 }, { "epoch": 0.7466829014987376, "grad_norm": 1.5992352962493896, "learning_rate": 3.1806997803954316e-05, "loss": 1.2118, "step": 20850 }, { "epoch": 0.7467187136283059, "grad_norm": 1.8797727823257446, "learning_rate": 3.1798514542559164e-05, "loss": 1.2311, "step": 20851 }, { "epoch": 0.7467545257578742, "grad_norm": 1.4515434503555298, "learning_rate": 3.1790032198727014e-05, "loss": 1.4252, "step": 20852 }, { "epoch": 0.7467903378874424, "grad_norm": 1.5532227754592896, "learning_rate": 3.178155077257201e-05, "loss": 1.2506, "step": 20853 }, { "epoch": 0.7468261500170108, "grad_norm": 1.7346625328063965, "learning_rate": 3.177307026420827e-05, "loss": 1.4919, "step": 20854 }, { "epoch": 0.7468619621465791, "grad_norm": 1.302531361579895, "learning_rate": 3.176459067374984e-05, "loss": 1.3361, "step": 20855 }, { "epoch": 0.7468977742761473, "grad_norm": 2.094909191131592, "learning_rate": 3.175611200131081e-05, "loss": 1.3555, "step": 20856 }, { "epoch": 0.7469335864057156, "grad_norm": 1.6115424633026123, "learning_rate": 3.174763424700528e-05, "loss": 1.4763, "step": 20857 }, { "epoch": 0.7469693985352839, "grad_norm": 2.035444974899292, "learning_rate": 3.1739157410947316e-05, "loss": 1.6644, "step": 20858 }, { "epoch": 0.7470052106648521, "grad_norm": 1.8379755020141602, "learning_rate": 3.173068149325091e-05, "loss": 1.6804, "step": 20859 }, { "epoch": 0.7470410227944204, "grad_norm": 1.5692671537399292, "learning_rate": 3.172220649403011e-05, "loss": 1.5543, "step": 20860 }, { "epoch": 0.7470768349239888, "grad_norm": 1.5705647468566895, "learning_rate": 3.1713732413399e-05, "loss": 1.475, "step": 20861 }, { "epoch": 0.7471126470535571, "grad_norm": 1.777363896369934, "learning_rate": 3.1705259251471496e-05, "loss": 1.4334, "step": 20862 }, { "epoch": 0.7471484591831253, "grad_norm": 2.5444350242614746, "learning_rate": 3.169678700836164e-05, "loss": 1.3177, "step": 20863 }, { "epoch": 0.7471842713126936, "grad_norm": 2.386817216873169, "learning_rate": 3.168831568418341e-05, "loss": 1.6777, "step": 20864 }, { "epoch": 0.7472200834422619, "grad_norm": 1.4960191249847412, "learning_rate": 3.16798452790508e-05, "loss": 1.3594, "step": 20865 }, { "epoch": 0.7472558955718301, "grad_norm": 1.516856074333191, "learning_rate": 3.167137579307773e-05, "loss": 1.5066, "step": 20866 }, { "epoch": 0.7472917077013984, "grad_norm": 1.7102595567703247, "learning_rate": 3.1662907226378145e-05, "loss": 1.5464, "step": 20867 }, { "epoch": 0.7473275198309668, "grad_norm": 1.462852120399475, "learning_rate": 3.165443957906603e-05, "loss": 1.2122, "step": 20868 }, { "epoch": 0.7473633319605351, "grad_norm": 1.76582670211792, "learning_rate": 3.164597285125525e-05, "loss": 1.5579, "step": 20869 }, { "epoch": 0.7473991440901033, "grad_norm": 2.204495906829834, "learning_rate": 3.163750704305972e-05, "loss": 1.5112, "step": 20870 }, { "epoch": 0.7474349562196716, "grad_norm": 2.0119872093200684, "learning_rate": 3.162904215459336e-05, "loss": 1.4266, "step": 20871 }, { "epoch": 0.7474707683492399, "grad_norm": 1.4584416151046753, "learning_rate": 3.1620578185970075e-05, "loss": 1.4588, "step": 20872 }, { "epoch": 0.7475065804788081, "grad_norm": 1.7966676950454712, "learning_rate": 3.161211513730368e-05, "loss": 1.4396, "step": 20873 }, { "epoch": 0.7475423926083764, "grad_norm": 1.828666090965271, "learning_rate": 3.160365300870804e-05, "loss": 1.3623, "step": 20874 }, { "epoch": 0.7475782047379448, "grad_norm": 1.817426085472107, "learning_rate": 3.159519180029705e-05, "loss": 1.5559, "step": 20875 }, { "epoch": 0.747614016867513, "grad_norm": 1.6124011278152466, "learning_rate": 3.1586731512184545e-05, "loss": 1.2511, "step": 20876 }, { "epoch": 0.7476498289970813, "grad_norm": 1.5697824954986572, "learning_rate": 3.157827214448428e-05, "loss": 1.4974, "step": 20877 }, { "epoch": 0.7476856411266496, "grad_norm": 1.6202102899551392, "learning_rate": 3.1569813697310115e-05, "loss": 1.536, "step": 20878 }, { "epoch": 0.7477214532562179, "grad_norm": 1.5296554565429688, "learning_rate": 3.156135617077587e-05, "loss": 1.4601, "step": 20879 }, { "epoch": 0.7477572653857861, "grad_norm": 1.323926568031311, "learning_rate": 3.155289956499525e-05, "loss": 1.3919, "step": 20880 }, { "epoch": 0.7477930775153544, "grad_norm": 1.4089916944503784, "learning_rate": 3.15444438800821e-05, "loss": 1.5501, "step": 20881 }, { "epoch": 0.7478288896449228, "grad_norm": 1.9115785360336304, "learning_rate": 3.1535989116150146e-05, "loss": 1.6466, "step": 20882 }, { "epoch": 0.747864701774491, "grad_norm": 1.4998923540115356, "learning_rate": 3.1527535273313166e-05, "loss": 1.3259, "step": 20883 }, { "epoch": 0.7479005139040593, "grad_norm": 1.8716665506362915, "learning_rate": 3.151908235168486e-05, "loss": 1.6371, "step": 20884 }, { "epoch": 0.7479363260336276, "grad_norm": 1.64845871925354, "learning_rate": 3.151063035137896e-05, "loss": 1.4893, "step": 20885 }, { "epoch": 0.7479721381631959, "grad_norm": 1.3553320169448853, "learning_rate": 3.1502179272509216e-05, "loss": 1.3028, "step": 20886 }, { "epoch": 0.7480079502927641, "grad_norm": 1.9826897382736206, "learning_rate": 3.149372911518926e-05, "loss": 1.5283, "step": 20887 }, { "epoch": 0.7480437624223324, "grad_norm": 1.5831016302108765, "learning_rate": 3.1485279879532826e-05, "loss": 1.4323, "step": 20888 }, { "epoch": 0.7480795745519008, "grad_norm": 1.9086743593215942, "learning_rate": 3.147683156565355e-05, "loss": 1.4901, "step": 20889 }, { "epoch": 0.748115386681469, "grad_norm": 1.5491160154342651, "learning_rate": 3.146838417366517e-05, "loss": 1.7092, "step": 20890 }, { "epoch": 0.7481511988110373, "grad_norm": 1.6630737781524658, "learning_rate": 3.145993770368124e-05, "loss": 1.4201, "step": 20891 }, { "epoch": 0.7481870109406056, "grad_norm": 1.6699607372283936, "learning_rate": 3.1451492155815444e-05, "loss": 1.2344, "step": 20892 }, { "epoch": 0.7482228230701738, "grad_norm": 1.4572018384933472, "learning_rate": 3.1443047530181394e-05, "loss": 1.5246, "step": 20893 }, { "epoch": 0.7482586351997421, "grad_norm": 1.2610589265823364, "learning_rate": 3.143460382689274e-05, "loss": 1.3851, "step": 20894 }, { "epoch": 0.7482944473293104, "grad_norm": 1.5143929719924927, "learning_rate": 3.142616104606304e-05, "loss": 1.1789, "step": 20895 }, { "epoch": 0.7483302594588788, "grad_norm": 1.5444740056991577, "learning_rate": 3.141771918780584e-05, "loss": 1.2268, "step": 20896 }, { "epoch": 0.748366071588447, "grad_norm": 1.3086482286453247, "learning_rate": 3.140927825223482e-05, "loss": 1.2681, "step": 20897 }, { "epoch": 0.7484018837180153, "grad_norm": 1.645628809928894, "learning_rate": 3.140083823946346e-05, "loss": 1.5036, "step": 20898 }, { "epoch": 0.7484376958475836, "grad_norm": 1.3148621320724487, "learning_rate": 3.139239914960532e-05, "loss": 1.3868, "step": 20899 }, { "epoch": 0.7484735079771518, "grad_norm": 1.466416597366333, "learning_rate": 3.138396098277396e-05, "loss": 1.4579, "step": 20900 }, { "epoch": 0.7485093201067201, "grad_norm": 2.0066518783569336, "learning_rate": 3.1375523739082936e-05, "loss": 1.3685, "step": 20901 }, { "epoch": 0.7485451322362884, "grad_norm": 1.307276964187622, "learning_rate": 3.136708741864568e-05, "loss": 1.0352, "step": 20902 }, { "epoch": 0.7485809443658568, "grad_norm": 1.4867724180221558, "learning_rate": 3.135865202157574e-05, "loss": 1.5873, "step": 20903 }, { "epoch": 0.748616756495425, "grad_norm": 1.7414683103561401, "learning_rate": 3.135021754798663e-05, "loss": 1.5434, "step": 20904 }, { "epoch": 0.7486525686249933, "grad_norm": 1.3864867687225342, "learning_rate": 3.134178399799175e-05, "loss": 1.3304, "step": 20905 }, { "epoch": 0.7486883807545616, "grad_norm": 1.6754690408706665, "learning_rate": 3.1333351371704634e-05, "loss": 1.5759, "step": 20906 }, { "epoch": 0.7487241928841298, "grad_norm": 2.0780746936798096, "learning_rate": 3.132491966923864e-05, "loss": 1.8282, "step": 20907 }, { "epoch": 0.7487600050136981, "grad_norm": 1.4286201000213623, "learning_rate": 3.131648889070734e-05, "loss": 1.4423, "step": 20908 }, { "epoch": 0.7487958171432664, "grad_norm": 1.1952495574951172, "learning_rate": 3.130805903622405e-05, "loss": 1.5991, "step": 20909 }, { "epoch": 0.7488316292728348, "grad_norm": 1.5690759420394897, "learning_rate": 3.129963010590224e-05, "loss": 1.4049, "step": 20910 }, { "epoch": 0.748867441402403, "grad_norm": 1.718424916267395, "learning_rate": 3.1291202099855245e-05, "loss": 1.5049, "step": 20911 }, { "epoch": 0.7489032535319713, "grad_norm": 1.4387127161026, "learning_rate": 3.1282775018196554e-05, "loss": 1.6321, "step": 20912 }, { "epoch": 0.7489390656615396, "grad_norm": 1.6412798166275024, "learning_rate": 3.127434886103948e-05, "loss": 1.3564, "step": 20913 }, { "epoch": 0.7489748777911078, "grad_norm": 1.4113123416900635, "learning_rate": 3.1265923628497327e-05, "loss": 1.3528, "step": 20914 }, { "epoch": 0.7490106899206761, "grad_norm": 1.7293850183486938, "learning_rate": 3.125749932068359e-05, "loss": 1.3285, "step": 20915 }, { "epoch": 0.7490465020502444, "grad_norm": 1.4474704265594482, "learning_rate": 3.124907593771148e-05, "loss": 1.4824, "step": 20916 }, { "epoch": 0.7490823141798127, "grad_norm": 2.540874719619751, "learning_rate": 3.1240653479694415e-05, "loss": 1.2577, "step": 20917 }, { "epoch": 0.749118126309381, "grad_norm": 1.6497268676757812, "learning_rate": 3.123223194674559e-05, "loss": 1.3947, "step": 20918 }, { "epoch": 0.7491539384389493, "grad_norm": 1.7501670122146606, "learning_rate": 3.122381133897846e-05, "loss": 1.7379, "step": 20919 }, { "epoch": 0.7491897505685176, "grad_norm": 1.541893482208252, "learning_rate": 3.121539165650619e-05, "loss": 1.1808, "step": 20920 }, { "epoch": 0.7492255626980858, "grad_norm": 1.4933574199676514, "learning_rate": 3.120697289944213e-05, "loss": 1.4671, "step": 20921 }, { "epoch": 0.7492613748276541, "grad_norm": 1.7959959506988525, "learning_rate": 3.119855506789948e-05, "loss": 1.4678, "step": 20922 }, { "epoch": 0.7492971869572224, "grad_norm": 1.9064847230911255, "learning_rate": 3.1190138161991536e-05, "loss": 1.616, "step": 20923 }, { "epoch": 0.7493329990867907, "grad_norm": 1.8895035982131958, "learning_rate": 3.118172218183154e-05, "loss": 1.3401, "step": 20924 }, { "epoch": 0.749368811216359, "grad_norm": 3.042356252670288, "learning_rate": 3.117330712753265e-05, "loss": 1.5714, "step": 20925 }, { "epoch": 0.7494046233459273, "grad_norm": 1.5778942108154297, "learning_rate": 3.11648929992082e-05, "loss": 1.4569, "step": 20926 }, { "epoch": 0.7494404354754955, "grad_norm": 1.8818434476852417, "learning_rate": 3.115647979697128e-05, "loss": 1.3493, "step": 20927 }, { "epoch": 0.7494762476050638, "grad_norm": 1.2638598680496216, "learning_rate": 3.114806752093517e-05, "loss": 1.4576, "step": 20928 }, { "epoch": 0.7495120597346321, "grad_norm": 1.5679749250411987, "learning_rate": 3.113965617121291e-05, "loss": 1.2887, "step": 20929 }, { "epoch": 0.7495478718642004, "grad_norm": 1.635933518409729, "learning_rate": 3.1131245747917835e-05, "loss": 1.6161, "step": 20930 }, { "epoch": 0.7495836839937687, "grad_norm": 1.6987125873565674, "learning_rate": 3.1122836251163014e-05, "loss": 1.4761, "step": 20931 }, { "epoch": 0.749619496123337, "grad_norm": 1.466423511505127, "learning_rate": 3.111442768106155e-05, "loss": 1.5627, "step": 20932 }, { "epoch": 0.7496553082529053, "grad_norm": 1.3633636236190796, "learning_rate": 3.1106020037726615e-05, "loss": 1.2434, "step": 20933 }, { "epoch": 0.7496911203824735, "grad_norm": 1.5454039573669434, "learning_rate": 3.1097613321271304e-05, "loss": 1.3154, "step": 20934 }, { "epoch": 0.7497269325120418, "grad_norm": 1.5664490461349487, "learning_rate": 3.108920753180875e-05, "loss": 1.4282, "step": 20935 }, { "epoch": 0.7497627446416101, "grad_norm": 2.946145534515381, "learning_rate": 3.1080802669452e-05, "loss": 1.5775, "step": 20936 }, { "epoch": 0.7497985567711783, "grad_norm": 2.2433459758758545, "learning_rate": 3.107239873431416e-05, "loss": 1.4163, "step": 20937 }, { "epoch": 0.7498343689007467, "grad_norm": 1.6156102418899536, "learning_rate": 3.1063995726508296e-05, "loss": 1.1802, "step": 20938 }, { "epoch": 0.749870181030315, "grad_norm": 2.013106346130371, "learning_rate": 3.105559364614743e-05, "loss": 1.434, "step": 20939 }, { "epoch": 0.7499059931598833, "grad_norm": 2.1555824279785156, "learning_rate": 3.1047192493344624e-05, "loss": 1.3182, "step": 20940 }, { "epoch": 0.7499418052894515, "grad_norm": 1.372545599937439, "learning_rate": 3.103879226821289e-05, "loss": 1.2394, "step": 20941 }, { "epoch": 0.7499776174190198, "grad_norm": 2.159738302230835, "learning_rate": 3.1030392970865286e-05, "loss": 1.2531, "step": 20942 }, { "epoch": 0.7500134295485881, "grad_norm": 1.378603219985962, "learning_rate": 3.102199460141475e-05, "loss": 1.3648, "step": 20943 }, { "epoch": 0.7500492416781563, "grad_norm": 1.817529320716858, "learning_rate": 3.1013597159974304e-05, "loss": 1.4851, "step": 20944 }, { "epoch": 0.7500850538077247, "grad_norm": 1.4849520921707153, "learning_rate": 3.1005200646656915e-05, "loss": 1.3406, "step": 20945 }, { "epoch": 0.750120865937293, "grad_norm": 2.093026638031006, "learning_rate": 3.09968050615756e-05, "loss": 1.2892, "step": 20946 }, { "epoch": 0.7501566780668613, "grad_norm": 1.8699207305908203, "learning_rate": 3.0988410404843216e-05, "loss": 1.7263, "step": 20947 }, { "epoch": 0.7501924901964295, "grad_norm": 2.0237114429473877, "learning_rate": 3.0980016676572766e-05, "loss": 1.4728, "step": 20948 }, { "epoch": 0.7502283023259978, "grad_norm": 1.5000897645950317, "learning_rate": 3.097162387687719e-05, "loss": 1.3496, "step": 20949 }, { "epoch": 0.7502641144555661, "grad_norm": 2.2044296264648438, "learning_rate": 3.096323200586934e-05, "loss": 1.1754, "step": 20950 }, { "epoch": 0.7502999265851343, "grad_norm": 1.645936369895935, "learning_rate": 3.0954841063662145e-05, "loss": 1.5664, "step": 20951 }, { "epoch": 0.7503357387147027, "grad_norm": 1.730129599571228, "learning_rate": 3.094645105036851e-05, "loss": 1.2231, "step": 20952 }, { "epoch": 0.750371550844271, "grad_norm": 1.6695406436920166, "learning_rate": 3.093806196610134e-05, "loss": 1.5443, "step": 20953 }, { "epoch": 0.7504073629738393, "grad_norm": 1.7872570753097534, "learning_rate": 3.092967381097342e-05, "loss": 1.2933, "step": 20954 }, { "epoch": 0.7504431751034075, "grad_norm": 1.5018796920776367, "learning_rate": 3.092128658509765e-05, "loss": 1.4061, "step": 20955 }, { "epoch": 0.7504789872329758, "grad_norm": 2.464582681655884, "learning_rate": 3.09129002885869e-05, "loss": 1.5651, "step": 20956 }, { "epoch": 0.7505147993625441, "grad_norm": 1.7600558996200562, "learning_rate": 3.090451492155392e-05, "loss": 1.6592, "step": 20957 }, { "epoch": 0.7505506114921123, "grad_norm": 1.316369652748108, "learning_rate": 3.089613048411158e-05, "loss": 1.3414, "step": 20958 }, { "epoch": 0.7505864236216807, "grad_norm": 1.9160432815551758, "learning_rate": 3.088774697637265e-05, "loss": 1.3255, "step": 20959 }, { "epoch": 0.750622235751249, "grad_norm": 2.339635133743286, "learning_rate": 3.087936439844997e-05, "loss": 1.357, "step": 20960 }, { "epoch": 0.7506580478808172, "grad_norm": 1.4159473180770874, "learning_rate": 3.087098275045626e-05, "loss": 1.3368, "step": 20961 }, { "epoch": 0.7506938600103855, "grad_norm": 2.2639219760894775, "learning_rate": 3.08626020325043e-05, "loss": 1.565, "step": 20962 }, { "epoch": 0.7507296721399538, "grad_norm": 1.6115988492965698, "learning_rate": 3.0854222244706857e-05, "loss": 1.4387, "step": 20963 }, { "epoch": 0.750765484269522, "grad_norm": 1.6219232082366943, "learning_rate": 3.0845843387176686e-05, "loss": 1.5412, "step": 20964 }, { "epoch": 0.7508012963990903, "grad_norm": 1.478438377380371, "learning_rate": 3.083746546002646e-05, "loss": 1.3816, "step": 20965 }, { "epoch": 0.7508371085286587, "grad_norm": 1.358998417854309, "learning_rate": 3.082908846336891e-05, "loss": 1.3876, "step": 20966 }, { "epoch": 0.750872920658227, "grad_norm": 1.556822419166565, "learning_rate": 3.082071239731681e-05, "loss": 1.4421, "step": 20967 }, { "epoch": 0.7509087327877952, "grad_norm": 1.4174234867095947, "learning_rate": 3.0812337261982735e-05, "loss": 1.4759, "step": 20968 }, { "epoch": 0.7509445449173635, "grad_norm": 1.8507611751556396, "learning_rate": 3.080396305747942e-05, "loss": 1.6876, "step": 20969 }, { "epoch": 0.7509803570469318, "grad_norm": 1.3811239004135132, "learning_rate": 3.0795589783919543e-05, "loss": 1.3176, "step": 20970 }, { "epoch": 0.7510161691765, "grad_norm": 1.923411250114441, "learning_rate": 3.078721744141575e-05, "loss": 1.3883, "step": 20971 }, { "epoch": 0.7510519813060683, "grad_norm": 2.107851266860962, "learning_rate": 3.0778846030080644e-05, "loss": 1.5351, "step": 20972 }, { "epoch": 0.7510877934356367, "grad_norm": 1.7084307670593262, "learning_rate": 3.077047555002688e-05, "loss": 1.539, "step": 20973 }, { "epoch": 0.751123605565205, "grad_norm": 2.1994099617004395, "learning_rate": 3.0762106001367095e-05, "loss": 1.2937, "step": 20974 }, { "epoch": 0.7511594176947732, "grad_norm": 1.8254406452178955, "learning_rate": 3.075373738421383e-05, "loss": 1.2871, "step": 20975 }, { "epoch": 0.7511952298243415, "grad_norm": 1.9883519411087036, "learning_rate": 3.0745369698679715e-05, "loss": 1.3643, "step": 20976 }, { "epoch": 0.7512310419539098, "grad_norm": 1.6190694570541382, "learning_rate": 3.0737002944877314e-05, "loss": 1.2145, "step": 20977 }, { "epoch": 0.751266854083478, "grad_norm": 1.9418599605560303, "learning_rate": 3.072863712291922e-05, "loss": 1.3982, "step": 20978 }, { "epoch": 0.7513026662130463, "grad_norm": 1.3245214223861694, "learning_rate": 3.0720272232917934e-05, "loss": 1.4906, "step": 20979 }, { "epoch": 0.7513384783426147, "grad_norm": 1.2552196979522705, "learning_rate": 3.071190827498602e-05, "loss": 1.4326, "step": 20980 }, { "epoch": 0.751374290472183, "grad_norm": 1.699867844581604, "learning_rate": 3.070354524923601e-05, "loss": 1.4727, "step": 20981 }, { "epoch": 0.7514101026017512, "grad_norm": 1.6668874025344849, "learning_rate": 3.0695183155780435e-05, "loss": 1.3547, "step": 20982 }, { "epoch": 0.7514459147313195, "grad_norm": 1.845920443534851, "learning_rate": 3.068682199473175e-05, "loss": 1.4138, "step": 20983 }, { "epoch": 0.7514817268608878, "grad_norm": 1.6310741901397705, "learning_rate": 3.067846176620247e-05, "loss": 1.2511, "step": 20984 }, { "epoch": 0.751517538990456, "grad_norm": 1.9002145528793335, "learning_rate": 3.06701024703051e-05, "loss": 1.4841, "step": 20985 }, { "epoch": 0.7515533511200243, "grad_norm": 2.533083438873291, "learning_rate": 3.0661744107152025e-05, "loss": 1.6083, "step": 20986 }, { "epoch": 0.7515891632495927, "grad_norm": 1.722609519958496, "learning_rate": 3.0653386676855756e-05, "loss": 1.2962, "step": 20987 }, { "epoch": 0.751624975379161, "grad_norm": 2.650226593017578, "learning_rate": 3.064503017952871e-05, "loss": 1.357, "step": 20988 }, { "epoch": 0.7516607875087292, "grad_norm": 2.232440948486328, "learning_rate": 3.0636674615283364e-05, "loss": 1.5926, "step": 20989 }, { "epoch": 0.7516965996382975, "grad_norm": 1.7429866790771484, "learning_rate": 3.0628319984232056e-05, "loss": 1.547, "step": 20990 }, { "epoch": 0.7517324117678658, "grad_norm": 1.6939197778701782, "learning_rate": 3.061996628648721e-05, "loss": 1.1195, "step": 20991 }, { "epoch": 0.751768223897434, "grad_norm": 1.5635935068130493, "learning_rate": 3.0611613522161266e-05, "loss": 1.1881, "step": 20992 }, { "epoch": 0.7518040360270023, "grad_norm": 1.8957972526550293, "learning_rate": 3.0603261691366525e-05, "loss": 1.3517, "step": 20993 }, { "epoch": 0.7518398481565707, "grad_norm": 2.051981210708618, "learning_rate": 3.05949107942154e-05, "loss": 1.413, "step": 20994 }, { "epoch": 0.751875660286139, "grad_norm": 1.7447413206100464, "learning_rate": 3.0586560830820174e-05, "loss": 1.5783, "step": 20995 }, { "epoch": 0.7519114724157072, "grad_norm": 1.535361886024475, "learning_rate": 3.05782118012933e-05, "loss": 1.3961, "step": 20996 }, { "epoch": 0.7519472845452755, "grad_norm": 1.363828420639038, "learning_rate": 3.0569863705747004e-05, "loss": 1.5911, "step": 20997 }, { "epoch": 0.7519830966748438, "grad_norm": 3.876757860183716, "learning_rate": 3.0561516544293634e-05, "loss": 1.889, "step": 20998 }, { "epoch": 0.752018908804412, "grad_norm": 1.9156787395477295, "learning_rate": 3.0553170317045485e-05, "loss": 1.1729, "step": 20999 }, { "epoch": 0.7520547209339803, "grad_norm": 2.1314427852630615, "learning_rate": 3.054482502411489e-05, "loss": 1.4521, "step": 21000 }, { "epoch": 0.7520905330635487, "grad_norm": 1.6527806520462036, "learning_rate": 3.0536480665614075e-05, "loss": 1.6505, "step": 21001 }, { "epoch": 0.7521263451931169, "grad_norm": 1.80655837059021, "learning_rate": 3.052813724165525e-05, "loss": 1.2674, "step": 21002 }, { "epoch": 0.7521621573226852, "grad_norm": 2.413799285888672, "learning_rate": 3.051979475235078e-05, "loss": 1.1142, "step": 21003 }, { "epoch": 0.7521979694522535, "grad_norm": 1.5094581842422485, "learning_rate": 3.0511453197812834e-05, "loss": 1.3326, "step": 21004 }, { "epoch": 0.7522337815818217, "grad_norm": 1.4582942724227905, "learning_rate": 3.050311257815368e-05, "loss": 1.3724, "step": 21005 }, { "epoch": 0.75226959371139, "grad_norm": 1.700408935546875, "learning_rate": 3.0494772893485435e-05, "loss": 1.1878, "step": 21006 }, { "epoch": 0.7523054058409583, "grad_norm": 1.694864273071289, "learning_rate": 3.0486434143920428e-05, "loss": 1.2196, "step": 21007 }, { "epoch": 0.7523412179705267, "grad_norm": 1.9889030456542969, "learning_rate": 3.047809632957075e-05, "loss": 1.3167, "step": 21008 }, { "epoch": 0.7523770301000949, "grad_norm": 1.6860452890396118, "learning_rate": 3.0469759450548607e-05, "loss": 1.2681, "step": 21009 }, { "epoch": 0.7524128422296632, "grad_norm": 1.7383288145065308, "learning_rate": 3.0461423506966203e-05, "loss": 1.2188, "step": 21010 }, { "epoch": 0.7524486543592315, "grad_norm": 1.203225016593933, "learning_rate": 3.0453088498935612e-05, "loss": 1.459, "step": 21011 }, { "epoch": 0.7524844664887997, "grad_norm": 1.667195439338684, "learning_rate": 3.0444754426569032e-05, "loss": 1.4978, "step": 21012 }, { "epoch": 0.752520278618368, "grad_norm": 1.8023768663406372, "learning_rate": 3.04364212899785e-05, "loss": 1.3717, "step": 21013 }, { "epoch": 0.7525560907479363, "grad_norm": 1.4114989042282104, "learning_rate": 3.0428089089276257e-05, "loss": 1.6978, "step": 21014 }, { "epoch": 0.7525919028775047, "grad_norm": 1.4679601192474365, "learning_rate": 3.04197578245743e-05, "loss": 1.5715, "step": 21015 }, { "epoch": 0.7526277150070729, "grad_norm": 1.8693400621414185, "learning_rate": 3.041142749598479e-05, "loss": 1.6269, "step": 21016 }, { "epoch": 0.7526635271366412, "grad_norm": 2.4425885677337646, "learning_rate": 3.0403098103619687e-05, "loss": 1.4659, "step": 21017 }, { "epoch": 0.7526993392662095, "grad_norm": 1.5194597244262695, "learning_rate": 3.0394769647591194e-05, "loss": 1.5173, "step": 21018 }, { "epoch": 0.7527351513957777, "grad_norm": 1.4792680740356445, "learning_rate": 3.0386442128011282e-05, "loss": 1.0879, "step": 21019 }, { "epoch": 0.752770963525346, "grad_norm": 1.689097285270691, "learning_rate": 3.037811554499197e-05, "loss": 1.0078, "step": 21020 }, { "epoch": 0.7528067756549143, "grad_norm": 1.4809821844100952, "learning_rate": 3.0369789898645306e-05, "loss": 1.4036, "step": 21021 }, { "epoch": 0.7528425877844827, "grad_norm": 1.493944764137268, "learning_rate": 3.0361465189083305e-05, "loss": 1.4354, "step": 21022 }, { "epoch": 0.7528783999140509, "grad_norm": 1.486365795135498, "learning_rate": 3.0353141416417997e-05, "loss": 1.2398, "step": 21023 }, { "epoch": 0.7529142120436192, "grad_norm": 1.6679426431655884, "learning_rate": 3.034481858076127e-05, "loss": 1.3382, "step": 21024 }, { "epoch": 0.7529500241731875, "grad_norm": 1.6131479740142822, "learning_rate": 3.0336496682225214e-05, "loss": 1.4169, "step": 21025 }, { "epoch": 0.7529858363027557, "grad_norm": 1.4379775524139404, "learning_rate": 3.0328175720921715e-05, "loss": 1.2663, "step": 21026 }, { "epoch": 0.753021648432324, "grad_norm": 1.8165546655654907, "learning_rate": 3.0319855696962762e-05, "loss": 1.577, "step": 21027 }, { "epoch": 0.7530574605618923, "grad_norm": 1.724393367767334, "learning_rate": 3.0311536610460245e-05, "loss": 1.671, "step": 21028 }, { "epoch": 0.7530932726914606, "grad_norm": 1.8695555925369263, "learning_rate": 3.0303218461526116e-05, "loss": 1.7049, "step": 21029 }, { "epoch": 0.7531290848210289, "grad_norm": 2.045958995819092, "learning_rate": 3.02949012502723e-05, "loss": 1.5631, "step": 21030 }, { "epoch": 0.7531648969505972, "grad_norm": 1.6942603588104248, "learning_rate": 3.028658497681065e-05, "loss": 1.4666, "step": 21031 }, { "epoch": 0.7532007090801655, "grad_norm": 2.18617582321167, "learning_rate": 3.0278269641253075e-05, "loss": 1.3933, "step": 21032 }, { "epoch": 0.7532365212097337, "grad_norm": 2.1976447105407715, "learning_rate": 3.0269955243711457e-05, "loss": 1.314, "step": 21033 }, { "epoch": 0.753272333339302, "grad_norm": 1.413646936416626, "learning_rate": 3.0261641784297666e-05, "loss": 1.5067, "step": 21034 }, { "epoch": 0.7533081454688703, "grad_norm": 1.595048189163208, "learning_rate": 3.0253329263123497e-05, "loss": 1.3638, "step": 21035 }, { "epoch": 0.7533439575984386, "grad_norm": 2.5425102710723877, "learning_rate": 3.0245017680300813e-05, "loss": 1.1756, "step": 21036 }, { "epoch": 0.7533797697280069, "grad_norm": 1.6122390031814575, "learning_rate": 3.0236707035941482e-05, "loss": 1.591, "step": 21037 }, { "epoch": 0.7534155818575752, "grad_norm": 1.5871548652648926, "learning_rate": 3.0228397330157233e-05, "loss": 1.2486, "step": 21038 }, { "epoch": 0.7534513939871434, "grad_norm": 1.390411615371704, "learning_rate": 3.022008856305989e-05, "loss": 1.6519, "step": 21039 }, { "epoch": 0.7534872061167117, "grad_norm": 1.7453995943069458, "learning_rate": 3.0211780734761254e-05, "loss": 1.1648, "step": 21040 }, { "epoch": 0.75352301824628, "grad_norm": 1.4689477682113647, "learning_rate": 3.020347384537312e-05, "loss": 1.5223, "step": 21041 }, { "epoch": 0.7535588303758483, "grad_norm": 1.7363903522491455, "learning_rate": 3.019516789500718e-05, "loss": 1.3932, "step": 21042 }, { "epoch": 0.7535946425054166, "grad_norm": 1.9193975925445557, "learning_rate": 3.0186862883775214e-05, "loss": 1.2524, "step": 21043 }, { "epoch": 0.7536304546349849, "grad_norm": 1.6323972940444946, "learning_rate": 3.017855881178899e-05, "loss": 1.7168, "step": 21044 }, { "epoch": 0.7536662667645532, "grad_norm": 1.584980845451355, "learning_rate": 3.0170255679160163e-05, "loss": 1.1829, "step": 21045 }, { "epoch": 0.7537020788941214, "grad_norm": 1.800627589225769, "learning_rate": 3.0161953486000473e-05, "loss": 1.3106, "step": 21046 }, { "epoch": 0.7537378910236897, "grad_norm": 1.3021795749664307, "learning_rate": 3.0153652232421603e-05, "loss": 1.4727, "step": 21047 }, { "epoch": 0.753773703153258, "grad_norm": 1.9696956872940063, "learning_rate": 3.014535191853529e-05, "loss": 1.3064, "step": 21048 }, { "epoch": 0.7538095152828262, "grad_norm": 1.3597009181976318, "learning_rate": 3.0137052544453126e-05, "loss": 1.4837, "step": 21049 }, { "epoch": 0.7538453274123946, "grad_norm": 1.9204648733139038, "learning_rate": 3.0128754110286806e-05, "loss": 1.6705, "step": 21050 }, { "epoch": 0.7538811395419629, "grad_norm": 1.4181511402130127, "learning_rate": 3.012045661614796e-05, "loss": 1.4157, "step": 21051 }, { "epoch": 0.7539169516715312, "grad_norm": 1.776105523109436, "learning_rate": 3.0112160062148274e-05, "loss": 1.5494, "step": 21052 }, { "epoch": 0.7539527638010994, "grad_norm": 1.4429407119750977, "learning_rate": 3.01038644483993e-05, "loss": 1.3892, "step": 21053 }, { "epoch": 0.7539885759306677, "grad_norm": 1.9590340852737427, "learning_rate": 3.0095569775012665e-05, "loss": 1.2104, "step": 21054 }, { "epoch": 0.754024388060236, "grad_norm": 1.9207379817962646, "learning_rate": 3.0087276042099997e-05, "loss": 1.5865, "step": 21055 }, { "epoch": 0.7540602001898042, "grad_norm": 1.3791786432266235, "learning_rate": 3.007898324977282e-05, "loss": 1.1746, "step": 21056 }, { "epoch": 0.7540960123193726, "grad_norm": 1.5347812175750732, "learning_rate": 3.0070691398142726e-05, "loss": 1.4336, "step": 21057 }, { "epoch": 0.7541318244489409, "grad_norm": 1.8425312042236328, "learning_rate": 3.0062400487321286e-05, "loss": 1.704, "step": 21058 }, { "epoch": 0.7541676365785092, "grad_norm": 1.758529543876648, "learning_rate": 3.0054110517420052e-05, "loss": 1.6625, "step": 21059 }, { "epoch": 0.7542034487080774, "grad_norm": 1.4791284799575806, "learning_rate": 3.004582148855052e-05, "loss": 1.6378, "step": 21060 }, { "epoch": 0.7542392608376457, "grad_norm": 1.824438452720642, "learning_rate": 3.0037533400824226e-05, "loss": 1.3131, "step": 21061 }, { "epoch": 0.754275072967214, "grad_norm": 1.6703860759735107, "learning_rate": 3.0029246254352694e-05, "loss": 1.6793, "step": 21062 }, { "epoch": 0.7543108850967822, "grad_norm": 1.5747116804122925, "learning_rate": 3.002096004924737e-05, "loss": 1.4264, "step": 21063 }, { "epoch": 0.7543466972263506, "grad_norm": 1.7274689674377441, "learning_rate": 3.0012674785619766e-05, "loss": 1.4875, "step": 21064 }, { "epoch": 0.7543825093559189, "grad_norm": 1.6294996738433838, "learning_rate": 3.0004390463581345e-05, "loss": 1.2883, "step": 21065 }, { "epoch": 0.7544183214854872, "grad_norm": 1.555970549583435, "learning_rate": 2.9996107083243598e-05, "loss": 1.3513, "step": 21066 }, { "epoch": 0.7544541336150554, "grad_norm": 1.3730995655059814, "learning_rate": 2.9987824644717898e-05, "loss": 1.3771, "step": 21067 }, { "epoch": 0.7544899457446237, "grad_norm": 1.953616738319397, "learning_rate": 2.997954314811571e-05, "loss": 1.5383, "step": 21068 }, { "epoch": 0.754525757874192, "grad_norm": 1.8902431726455688, "learning_rate": 2.9971262593548443e-05, "loss": 1.5828, "step": 21069 }, { "epoch": 0.7545615700037602, "grad_norm": 1.774874210357666, "learning_rate": 2.996298298112754e-05, "loss": 1.4622, "step": 21070 }, { "epoch": 0.7545973821333286, "grad_norm": 1.683915615081787, "learning_rate": 2.9954704310964332e-05, "loss": 1.5168, "step": 21071 }, { "epoch": 0.7546331942628969, "grad_norm": 1.5608534812927246, "learning_rate": 2.9946426583170217e-05, "loss": 1.3258, "step": 21072 }, { "epoch": 0.7546690063924651, "grad_norm": 1.3177636861801147, "learning_rate": 2.9938149797856608e-05, "loss": 1.1626, "step": 21073 }, { "epoch": 0.7547048185220334, "grad_norm": 1.6210861206054688, "learning_rate": 2.992987395513479e-05, "loss": 1.6502, "step": 21074 }, { "epoch": 0.7547406306516017, "grad_norm": 1.682212471961975, "learning_rate": 2.9921599055116135e-05, "loss": 1.5752, "step": 21075 }, { "epoch": 0.75477644278117, "grad_norm": 1.6862162351608276, "learning_rate": 2.991332509791196e-05, "loss": 1.6626, "step": 21076 }, { "epoch": 0.7548122549107382, "grad_norm": 2.048794746398926, "learning_rate": 2.9905052083633632e-05, "loss": 1.5058, "step": 21077 }, { "epoch": 0.7548480670403066, "grad_norm": 1.955229640007019, "learning_rate": 2.9896780012392377e-05, "loss": 1.5886, "step": 21078 }, { "epoch": 0.7548838791698749, "grad_norm": 1.326578140258789, "learning_rate": 2.9888508884299516e-05, "loss": 1.5777, "step": 21079 }, { "epoch": 0.7549196912994431, "grad_norm": 1.9599168300628662, "learning_rate": 2.9880238699466367e-05, "loss": 1.6122, "step": 21080 }, { "epoch": 0.7549555034290114, "grad_norm": 1.659332036972046, "learning_rate": 2.9871969458004135e-05, "loss": 1.2969, "step": 21081 }, { "epoch": 0.7549913155585797, "grad_norm": 2.504672050476074, "learning_rate": 2.9863701160024083e-05, "loss": 1.5504, "step": 21082 }, { "epoch": 0.755027127688148, "grad_norm": 1.9664294719696045, "learning_rate": 2.9855433805637467e-05, "loss": 1.3975, "step": 21083 }, { "epoch": 0.7550629398177162, "grad_norm": 2.3380939960479736, "learning_rate": 2.9847167394955543e-05, "loss": 1.267, "step": 21084 }, { "epoch": 0.7550987519472846, "grad_norm": 1.6964002847671509, "learning_rate": 2.9838901928089456e-05, "loss": 1.1031, "step": 21085 }, { "epoch": 0.7551345640768529, "grad_norm": 1.679082989692688, "learning_rate": 2.983063740515044e-05, "loss": 1.5012, "step": 21086 }, { "epoch": 0.7551703762064211, "grad_norm": 1.3451464176177979, "learning_rate": 2.9822373826249693e-05, "loss": 1.3227, "step": 21087 }, { "epoch": 0.7552061883359894, "grad_norm": 1.67902672290802, "learning_rate": 2.9814111191498405e-05, "loss": 1.4921, "step": 21088 }, { "epoch": 0.7552420004655577, "grad_norm": 1.4258464574813843, "learning_rate": 2.9805849501007733e-05, "loss": 1.4449, "step": 21089 }, { "epoch": 0.7552778125951259, "grad_norm": 1.4557456970214844, "learning_rate": 2.979758875488874e-05, "loss": 1.0844, "step": 21090 }, { "epoch": 0.7553136247246942, "grad_norm": 1.5208238363265991, "learning_rate": 2.9789328953252694e-05, "loss": 1.5754, "step": 21091 }, { "epoch": 0.7553494368542626, "grad_norm": 1.7373424768447876, "learning_rate": 2.9781070096210627e-05, "loss": 1.6214, "step": 21092 }, { "epoch": 0.7553852489838309, "grad_norm": 1.8888261318206787, "learning_rate": 2.9772812183873733e-05, "loss": 1.4306, "step": 21093 }, { "epoch": 0.7554210611133991, "grad_norm": 1.2451953887939453, "learning_rate": 2.9764555216352997e-05, "loss": 1.1014, "step": 21094 }, { "epoch": 0.7554568732429674, "grad_norm": 2.3028512001037598, "learning_rate": 2.975629919375963e-05, "loss": 1.2671, "step": 21095 }, { "epoch": 0.7554926853725357, "grad_norm": 1.363605260848999, "learning_rate": 2.974804411620462e-05, "loss": 1.3705, "step": 21096 }, { "epoch": 0.7555284975021039, "grad_norm": 1.8250839710235596, "learning_rate": 2.973978998379906e-05, "loss": 1.4062, "step": 21097 }, { "epoch": 0.7555643096316722, "grad_norm": 1.7646355628967285, "learning_rate": 2.9731536796654026e-05, "loss": 1.4773, "step": 21098 }, { "epoch": 0.7556001217612406, "grad_norm": 2.1657111644744873, "learning_rate": 2.9723284554880493e-05, "loss": 1.1014, "step": 21099 }, { "epoch": 0.7556359338908089, "grad_norm": 1.4602758884429932, "learning_rate": 2.9715033258589543e-05, "loss": 1.4667, "step": 21100 }, { "epoch": 0.7556717460203771, "grad_norm": 1.4769114255905151, "learning_rate": 2.9706782907892104e-05, "loss": 1.1822, "step": 21101 }, { "epoch": 0.7557075581499454, "grad_norm": 1.8590151071548462, "learning_rate": 2.9698533502899294e-05, "loss": 1.5489, "step": 21102 }, { "epoch": 0.7557433702795137, "grad_norm": 1.3102487325668335, "learning_rate": 2.9690285043722e-05, "loss": 1.4329, "step": 21103 }, { "epoch": 0.7557791824090819, "grad_norm": 1.4828435182571411, "learning_rate": 2.9682037530471252e-05, "loss": 1.3056, "step": 21104 }, { "epoch": 0.7558149945386502, "grad_norm": 1.2443448305130005, "learning_rate": 2.967379096325793e-05, "loss": 1.3004, "step": 21105 }, { "epoch": 0.7558508066682186, "grad_norm": 1.5931856632232666, "learning_rate": 2.966554534219309e-05, "loss": 1.6132, "step": 21106 }, { "epoch": 0.7558866187977868, "grad_norm": 1.3592056035995483, "learning_rate": 2.965730066738762e-05, "loss": 1.4226, "step": 21107 }, { "epoch": 0.7559224309273551, "grad_norm": 1.710934042930603, "learning_rate": 2.964905693895237e-05, "loss": 1.6701, "step": 21108 }, { "epoch": 0.7559582430569234, "grad_norm": 1.9876086711883545, "learning_rate": 2.9640814156998374e-05, "loss": 1.5418, "step": 21109 }, { "epoch": 0.7559940551864917, "grad_norm": 1.912034511566162, "learning_rate": 2.9632572321636443e-05, "loss": 1.5359, "step": 21110 }, { "epoch": 0.7560298673160599, "grad_norm": 1.493776798248291, "learning_rate": 2.9624331432977515e-05, "loss": 1.5861, "step": 21111 }, { "epoch": 0.7560656794456282, "grad_norm": 2.0025715827941895, "learning_rate": 2.9616091491132357e-05, "loss": 1.3727, "step": 21112 }, { "epoch": 0.7561014915751966, "grad_norm": 1.6120564937591553, "learning_rate": 2.9607852496211962e-05, "loss": 1.554, "step": 21113 }, { "epoch": 0.7561373037047648, "grad_norm": 1.4082764387130737, "learning_rate": 2.9599614448327084e-05, "loss": 1.3681, "step": 21114 }, { "epoch": 0.7561731158343331, "grad_norm": 1.5112791061401367, "learning_rate": 2.9591377347588623e-05, "loss": 1.4086, "step": 21115 }, { "epoch": 0.7562089279639014, "grad_norm": 1.5789004564285278, "learning_rate": 2.958314119410732e-05, "loss": 1.4716, "step": 21116 }, { "epoch": 0.7562447400934696, "grad_norm": 1.2509442567825317, "learning_rate": 2.9574905987994016e-05, "loss": 1.6457, "step": 21117 }, { "epoch": 0.7562805522230379, "grad_norm": 2.162203311920166, "learning_rate": 2.9566671729359552e-05, "loss": 1.665, "step": 21118 }, { "epoch": 0.7563163643526062, "grad_norm": 1.7488106489181519, "learning_rate": 2.9558438418314626e-05, "loss": 1.1802, "step": 21119 }, { "epoch": 0.7563521764821746, "grad_norm": 1.4477161169052124, "learning_rate": 2.9550206054970063e-05, "loss": 1.3837, "step": 21120 }, { "epoch": 0.7563879886117428, "grad_norm": 1.7796725034713745, "learning_rate": 2.9541974639436588e-05, "loss": 1.5306, "step": 21121 }, { "epoch": 0.7564238007413111, "grad_norm": 1.4333518743515015, "learning_rate": 2.9533744171824996e-05, "loss": 1.5737, "step": 21122 }, { "epoch": 0.7564596128708794, "grad_norm": 1.4880192279815674, "learning_rate": 2.9525514652245922e-05, "loss": 1.3167, "step": 21123 }, { "epoch": 0.7564954250004476, "grad_norm": 2.2075390815734863, "learning_rate": 2.9517286080810204e-05, "loss": 1.6719, "step": 21124 }, { "epoch": 0.7565312371300159, "grad_norm": 1.4467953443527222, "learning_rate": 2.950905845762849e-05, "loss": 1.5902, "step": 21125 }, { "epoch": 0.7565670492595842, "grad_norm": 1.8242559432983398, "learning_rate": 2.9500831782811433e-05, "loss": 1.417, "step": 21126 }, { "epoch": 0.7566028613891526, "grad_norm": 1.713553547859192, "learning_rate": 2.949260605646974e-05, "loss": 1.6277, "step": 21127 }, { "epoch": 0.7566386735187208, "grad_norm": 2.017822027206421, "learning_rate": 2.948438127871409e-05, "loss": 1.0925, "step": 21128 }, { "epoch": 0.7566744856482891, "grad_norm": 1.8438435792922974, "learning_rate": 2.947615744965516e-05, "loss": 1.3623, "step": 21129 }, { "epoch": 0.7567102977778574, "grad_norm": 1.515105128288269, "learning_rate": 2.9467934569403542e-05, "loss": 1.3707, "step": 21130 }, { "epoch": 0.7567461099074256, "grad_norm": 1.3222929239273071, "learning_rate": 2.945971263806987e-05, "loss": 1.3478, "step": 21131 }, { "epoch": 0.7567819220369939, "grad_norm": 1.3271212577819824, "learning_rate": 2.9451491655764816e-05, "loss": 1.2689, "step": 21132 }, { "epoch": 0.7568177341665622, "grad_norm": 1.8101156949996948, "learning_rate": 2.94432716225989e-05, "loss": 1.4378, "step": 21133 }, { "epoch": 0.7568535462961306, "grad_norm": 1.6955105066299438, "learning_rate": 2.943505253868276e-05, "loss": 1.4796, "step": 21134 }, { "epoch": 0.7568893584256988, "grad_norm": 1.475974440574646, "learning_rate": 2.942683440412697e-05, "loss": 1.3474, "step": 21135 }, { "epoch": 0.7569251705552671, "grad_norm": 1.774215579032898, "learning_rate": 2.941861721904211e-05, "loss": 1.1609, "step": 21136 }, { "epoch": 0.7569609826848354, "grad_norm": 1.5521552562713623, "learning_rate": 2.941040098353869e-05, "loss": 1.486, "step": 21137 }, { "epoch": 0.7569967948144036, "grad_norm": 1.7704689502716064, "learning_rate": 2.940218569772726e-05, "loss": 1.498, "step": 21138 }, { "epoch": 0.7570326069439719, "grad_norm": 2.514124870300293, "learning_rate": 2.9393971361718363e-05, "loss": 1.3497, "step": 21139 }, { "epoch": 0.7570684190735402, "grad_norm": 1.5722203254699707, "learning_rate": 2.9385757975622542e-05, "loss": 1.5558, "step": 21140 }, { "epoch": 0.7571042312031085, "grad_norm": 1.9936293363571167, "learning_rate": 2.937754553955022e-05, "loss": 1.5372, "step": 21141 }, { "epoch": 0.7571400433326768, "grad_norm": 1.4166237115859985, "learning_rate": 2.936933405361194e-05, "loss": 1.1757, "step": 21142 }, { "epoch": 0.7571758554622451, "grad_norm": 1.6292997598648071, "learning_rate": 2.936112351791819e-05, "loss": 1.3799, "step": 21143 }, { "epoch": 0.7572116675918134, "grad_norm": 2.5143439769744873, "learning_rate": 2.9352913932579362e-05, "loss": 1.3905, "step": 21144 }, { "epoch": 0.7572474797213816, "grad_norm": 1.4618525505065918, "learning_rate": 2.9344705297705966e-05, "loss": 1.3163, "step": 21145 }, { "epoch": 0.7572832918509499, "grad_norm": 1.6585017442703247, "learning_rate": 2.933649761340841e-05, "loss": 1.3903, "step": 21146 }, { "epoch": 0.7573191039805182, "grad_norm": 1.43913733959198, "learning_rate": 2.932829087979716e-05, "loss": 1.3842, "step": 21147 }, { "epoch": 0.7573549161100865, "grad_norm": 1.726843237876892, "learning_rate": 2.9320085096982575e-05, "loss": 1.6791, "step": 21148 }, { "epoch": 0.7573907282396548, "grad_norm": 1.5694811344146729, "learning_rate": 2.9311880265075066e-05, "loss": 1.3672, "step": 21149 }, { "epoch": 0.7574265403692231, "grad_norm": 1.3155571222305298, "learning_rate": 2.9303676384185064e-05, "loss": 1.2628, "step": 21150 }, { "epoch": 0.7574623524987913, "grad_norm": 1.3629982471466064, "learning_rate": 2.9295473454422863e-05, "loss": 1.1417, "step": 21151 }, { "epoch": 0.7574981646283596, "grad_norm": 1.683783769607544, "learning_rate": 2.928727147589887e-05, "loss": 1.7473, "step": 21152 }, { "epoch": 0.7575339767579279, "grad_norm": 1.4711257219314575, "learning_rate": 2.9279070448723432e-05, "loss": 1.245, "step": 21153 }, { "epoch": 0.7575697888874962, "grad_norm": 1.780172348022461, "learning_rate": 2.927087037300691e-05, "loss": 1.3952, "step": 21154 }, { "epoch": 0.7576056010170645, "grad_norm": 1.5945377349853516, "learning_rate": 2.926267124885955e-05, "loss": 1.3441, "step": 21155 }, { "epoch": 0.7576414131466328, "grad_norm": 2.19747257232666, "learning_rate": 2.9254473076391708e-05, "loss": 1.6498, "step": 21156 }, { "epoch": 0.7576772252762011, "grad_norm": 1.6467136144638062, "learning_rate": 2.924627585571368e-05, "loss": 1.4511, "step": 21157 }, { "epoch": 0.7577130374057693, "grad_norm": 1.4846833944320679, "learning_rate": 2.9238079586935773e-05, "loss": 1.4392, "step": 21158 }, { "epoch": 0.7577488495353376, "grad_norm": 1.7187808752059937, "learning_rate": 2.9229884270168195e-05, "loss": 1.3017, "step": 21159 }, { "epoch": 0.7577846616649059, "grad_norm": 1.69410240650177, "learning_rate": 2.9221689905521245e-05, "loss": 1.4055, "step": 21160 }, { "epoch": 0.7578204737944741, "grad_norm": 1.315220832824707, "learning_rate": 2.9213496493105187e-05, "loss": 1.1048, "step": 21161 }, { "epoch": 0.7578562859240425, "grad_norm": 1.9386459589004517, "learning_rate": 2.920530403303019e-05, "loss": 1.5724, "step": 21162 }, { "epoch": 0.7578920980536108, "grad_norm": 1.3954391479492188, "learning_rate": 2.9197112525406522e-05, "loss": 1.0904, "step": 21163 }, { "epoch": 0.7579279101831791, "grad_norm": 1.6443402767181396, "learning_rate": 2.918892197034436e-05, "loss": 1.4831, "step": 21164 }, { "epoch": 0.7579637223127473, "grad_norm": 1.8026351928710938, "learning_rate": 2.9180732367953956e-05, "loss": 1.7182, "step": 21165 }, { "epoch": 0.7579995344423156, "grad_norm": 1.4861021041870117, "learning_rate": 2.9172543718345413e-05, "loss": 1.3764, "step": 21166 }, { "epoch": 0.7580353465718839, "grad_norm": 1.567413568496704, "learning_rate": 2.9164356021628923e-05, "loss": 1.3016, "step": 21167 }, { "epoch": 0.7580711587014521, "grad_norm": 1.7095822095870972, "learning_rate": 2.915616927791469e-05, "loss": 1.23, "step": 21168 }, { "epoch": 0.7581069708310205, "grad_norm": 1.373910903930664, "learning_rate": 2.9147983487312793e-05, "loss": 1.5581, "step": 21169 }, { "epoch": 0.7581427829605888, "grad_norm": 1.7896029949188232, "learning_rate": 2.913979864993338e-05, "loss": 1.3731, "step": 21170 }, { "epoch": 0.7581785950901571, "grad_norm": 1.2884318828582764, "learning_rate": 2.9131614765886573e-05, "loss": 1.4324, "step": 21171 }, { "epoch": 0.7582144072197253, "grad_norm": 1.8694905042648315, "learning_rate": 2.912343183528251e-05, "loss": 1.6948, "step": 21172 }, { "epoch": 0.7582502193492936, "grad_norm": 2.111467123031616, "learning_rate": 2.9115249858231207e-05, "loss": 1.6019, "step": 21173 }, { "epoch": 0.7582860314788619, "grad_norm": 1.7151230573654175, "learning_rate": 2.91070688348428e-05, "loss": 1.6059, "step": 21174 }, { "epoch": 0.7583218436084301, "grad_norm": 1.535046935081482, "learning_rate": 2.9098888765227316e-05, "loss": 1.5526, "step": 21175 }, { "epoch": 0.7583576557379985, "grad_norm": 1.7479262351989746, "learning_rate": 2.9090709649494873e-05, "loss": 1.4793, "step": 21176 }, { "epoch": 0.7583934678675668, "grad_norm": 1.8973602056503296, "learning_rate": 2.908253148775546e-05, "loss": 1.8296, "step": 21177 }, { "epoch": 0.758429279997135, "grad_norm": 1.2485648393630981, "learning_rate": 2.9074354280119042e-05, "loss": 1.2522, "step": 21178 }, { "epoch": 0.7584650921267033, "grad_norm": 1.4664356708526611, "learning_rate": 2.9066178026695767e-05, "loss": 1.2564, "step": 21179 }, { "epoch": 0.7585009042562716, "grad_norm": 1.1946889162063599, "learning_rate": 2.9058002727595546e-05, "loss": 1.536, "step": 21180 }, { "epoch": 0.7585367163858399, "grad_norm": 1.3045967817306519, "learning_rate": 2.904982838292838e-05, "loss": 1.2439, "step": 21181 }, { "epoch": 0.7585725285154081, "grad_norm": 1.5080360174179077, "learning_rate": 2.9041654992804256e-05, "loss": 1.6046, "step": 21182 }, { "epoch": 0.7586083406449765, "grad_norm": 1.595199704170227, "learning_rate": 2.9033482557333158e-05, "loss": 1.3323, "step": 21183 }, { "epoch": 0.7586441527745448, "grad_norm": 1.783447265625, "learning_rate": 2.9025311076624994e-05, "loss": 1.5985, "step": 21184 }, { "epoch": 0.758679964904113, "grad_norm": 1.9217463731765747, "learning_rate": 2.9017140550789713e-05, "loss": 1.1523, "step": 21185 }, { "epoch": 0.7587157770336813, "grad_norm": 1.8757216930389404, "learning_rate": 2.9008970979937276e-05, "loss": 1.3591, "step": 21186 }, { "epoch": 0.7587515891632496, "grad_norm": 1.840748906135559, "learning_rate": 2.9000802364177527e-05, "loss": 1.3812, "step": 21187 }, { "epoch": 0.7587874012928179, "grad_norm": 1.5982416868209839, "learning_rate": 2.8992634703620437e-05, "loss": 1.5119, "step": 21188 }, { "epoch": 0.7588232134223861, "grad_norm": 1.6878429651260376, "learning_rate": 2.8984467998375786e-05, "loss": 1.0781, "step": 21189 }, { "epoch": 0.7588590255519545, "grad_norm": 1.7874064445495605, "learning_rate": 2.8976302248553576e-05, "loss": 1.2698, "step": 21190 }, { "epoch": 0.7588948376815228, "grad_norm": 1.8459925651550293, "learning_rate": 2.896813745426359e-05, "loss": 1.1503, "step": 21191 }, { "epoch": 0.758930649811091, "grad_norm": 1.5828872919082642, "learning_rate": 2.8959973615615675e-05, "loss": 1.2534, "step": 21192 }, { "epoch": 0.7589664619406593, "grad_norm": 2.555840492248535, "learning_rate": 2.8951810732719685e-05, "loss": 1.5415, "step": 21193 }, { "epoch": 0.7590022740702276, "grad_norm": 2.2074623107910156, "learning_rate": 2.8943648805685464e-05, "loss": 1.4024, "step": 21194 }, { "epoch": 0.7590380861997958, "grad_norm": 2.5466811656951904, "learning_rate": 2.893548783462279e-05, "loss": 1.4226, "step": 21195 }, { "epoch": 0.7590738983293641, "grad_norm": 1.3953499794006348, "learning_rate": 2.8927327819641403e-05, "loss": 1.4408, "step": 21196 }, { "epoch": 0.7591097104589324, "grad_norm": 1.9189091920852661, "learning_rate": 2.8919168760851202e-05, "loss": 1.2927, "step": 21197 }, { "epoch": 0.7591455225885008, "grad_norm": 1.6638646125793457, "learning_rate": 2.891101065836187e-05, "loss": 1.5423, "step": 21198 }, { "epoch": 0.759181334718069, "grad_norm": 1.5958912372589111, "learning_rate": 2.8902853512283225e-05, "loss": 1.3269, "step": 21199 }, { "epoch": 0.7592171468476373, "grad_norm": 2.3823351860046387, "learning_rate": 2.8894697322724908e-05, "loss": 1.2585, "step": 21200 }, { "epoch": 0.7592529589772056, "grad_norm": 1.484283208847046, "learning_rate": 2.8886542089796785e-05, "loss": 1.3847, "step": 21201 }, { "epoch": 0.7592887711067738, "grad_norm": 1.6040844917297363, "learning_rate": 2.8878387813608477e-05, "loss": 1.3717, "step": 21202 }, { "epoch": 0.7593245832363421, "grad_norm": 1.6195263862609863, "learning_rate": 2.8870234494269756e-05, "loss": 1.4155, "step": 21203 }, { "epoch": 0.7593603953659104, "grad_norm": 1.518485188484192, "learning_rate": 2.8862082131890243e-05, "loss": 1.3777, "step": 21204 }, { "epoch": 0.7593962074954788, "grad_norm": 2.1455929279327393, "learning_rate": 2.885393072657966e-05, "loss": 1.7861, "step": 21205 }, { "epoch": 0.759432019625047, "grad_norm": 1.6914993524551392, "learning_rate": 2.8845780278447688e-05, "loss": 1.2118, "step": 21206 }, { "epoch": 0.7594678317546153, "grad_norm": 1.3727785348892212, "learning_rate": 2.8837630787603908e-05, "loss": 1.2499, "step": 21207 }, { "epoch": 0.7595036438841836, "grad_norm": 1.734089732170105, "learning_rate": 2.882948225415807e-05, "loss": 1.5871, "step": 21208 }, { "epoch": 0.7595394560137518, "grad_norm": 1.2891333103179932, "learning_rate": 2.8821334678219712e-05, "loss": 1.2715, "step": 21209 }, { "epoch": 0.7595752681433201, "grad_norm": 2.27925443649292, "learning_rate": 2.8813188059898512e-05, "loss": 1.3765, "step": 21210 }, { "epoch": 0.7596110802728884, "grad_norm": 1.3303371667861938, "learning_rate": 2.8805042399303984e-05, "loss": 1.5651, "step": 21211 }, { "epoch": 0.7596468924024568, "grad_norm": 2.1135172843933105, "learning_rate": 2.8796897696545832e-05, "loss": 1.6908, "step": 21212 }, { "epoch": 0.759682704532025, "grad_norm": 1.5996071100234985, "learning_rate": 2.878875395173358e-05, "loss": 1.4533, "step": 21213 }, { "epoch": 0.7597185166615933, "grad_norm": 1.5009126663208008, "learning_rate": 2.8780611164976767e-05, "loss": 1.2599, "step": 21214 }, { "epoch": 0.7597543287911616, "grad_norm": 1.407975196838379, "learning_rate": 2.8772469336384954e-05, "loss": 1.2601, "step": 21215 }, { "epoch": 0.7597901409207298, "grad_norm": 1.3562133312225342, "learning_rate": 2.876432846606769e-05, "loss": 1.3456, "step": 21216 }, { "epoch": 0.7598259530502981, "grad_norm": 1.6135796308517456, "learning_rate": 2.8756188554134522e-05, "loss": 1.2708, "step": 21217 }, { "epoch": 0.7598617651798664, "grad_norm": 1.496204137802124, "learning_rate": 2.8748049600694893e-05, "loss": 1.3726, "step": 21218 }, { "epoch": 0.7598975773094347, "grad_norm": 2.0687203407287598, "learning_rate": 2.8739911605858394e-05, "loss": 1.3556, "step": 21219 }, { "epoch": 0.759933389439003, "grad_norm": 1.4460134506225586, "learning_rate": 2.873177456973445e-05, "loss": 1.1964, "step": 21220 }, { "epoch": 0.7599692015685713, "grad_norm": 1.672918677330017, "learning_rate": 2.872363849243257e-05, "loss": 1.3602, "step": 21221 }, { "epoch": 0.7600050136981396, "grad_norm": 1.2204346656799316, "learning_rate": 2.871550337406217e-05, "loss": 1.1142, "step": 21222 }, { "epoch": 0.7600408258277078, "grad_norm": 1.909233808517456, "learning_rate": 2.8707369214732716e-05, "loss": 1.4387, "step": 21223 }, { "epoch": 0.7600766379572761, "grad_norm": 1.3852407932281494, "learning_rate": 2.8699236014553686e-05, "loss": 1.5051, "step": 21224 }, { "epoch": 0.7601124500868444, "grad_norm": 1.9627448320388794, "learning_rate": 2.869110377363443e-05, "loss": 1.5291, "step": 21225 }, { "epoch": 0.7601482622164127, "grad_norm": 1.5735957622528076, "learning_rate": 2.868297249208438e-05, "loss": 1.5355, "step": 21226 }, { "epoch": 0.760184074345981, "grad_norm": 1.7588822841644287, "learning_rate": 2.867484217001296e-05, "loss": 1.5529, "step": 21227 }, { "epoch": 0.7602198864755493, "grad_norm": 1.5545251369476318, "learning_rate": 2.866671280752956e-05, "loss": 1.4601, "step": 21228 }, { "epoch": 0.7602556986051175, "grad_norm": 1.5367778539657593, "learning_rate": 2.8658584404743493e-05, "loss": 1.306, "step": 21229 }, { "epoch": 0.7602915107346858, "grad_norm": 1.8365488052368164, "learning_rate": 2.865045696176415e-05, "loss": 1.3618, "step": 21230 }, { "epoch": 0.7603273228642541, "grad_norm": 1.5795180797576904, "learning_rate": 2.8642330478700908e-05, "loss": 1.201, "step": 21231 }, { "epoch": 0.7603631349938224, "grad_norm": 2.363741397857666, "learning_rate": 2.8634204955663024e-05, "loss": 1.4861, "step": 21232 }, { "epoch": 0.7603989471233907, "grad_norm": 1.6575523614883423, "learning_rate": 2.862608039275987e-05, "loss": 1.3186, "step": 21233 }, { "epoch": 0.760434759252959, "grad_norm": 1.3435629606246948, "learning_rate": 2.861795679010073e-05, "loss": 1.7503, "step": 21234 }, { "epoch": 0.7604705713825273, "grad_norm": 1.258905291557312, "learning_rate": 2.8609834147794945e-05, "loss": 1.1639, "step": 21235 }, { "epoch": 0.7605063835120955, "grad_norm": 2.6588869094848633, "learning_rate": 2.8601712465951713e-05, "loss": 1.2825, "step": 21236 }, { "epoch": 0.7605421956416638, "grad_norm": 1.9487318992614746, "learning_rate": 2.8593591744680348e-05, "loss": 1.6677, "step": 21237 }, { "epoch": 0.7605780077712321, "grad_norm": 1.674471139907837, "learning_rate": 2.858547198409013e-05, "loss": 1.2264, "step": 21238 }, { "epoch": 0.7606138199008003, "grad_norm": 1.990461826324463, "learning_rate": 2.8577353184290236e-05, "loss": 1.341, "step": 21239 }, { "epoch": 0.7606496320303687, "grad_norm": 2.012136220932007, "learning_rate": 2.8569235345389922e-05, "loss": 1.583, "step": 21240 }, { "epoch": 0.760685444159937, "grad_norm": 1.3574470281600952, "learning_rate": 2.8561118467498415e-05, "loss": 1.6975, "step": 21241 }, { "epoch": 0.7607212562895053, "grad_norm": 1.79666006565094, "learning_rate": 2.855300255072494e-05, "loss": 1.5932, "step": 21242 }, { "epoch": 0.7607570684190735, "grad_norm": 1.7653344869613647, "learning_rate": 2.8544887595178616e-05, "loss": 1.4088, "step": 21243 }, { "epoch": 0.7607928805486418, "grad_norm": 1.3888636827468872, "learning_rate": 2.853677360096867e-05, "loss": 1.5872, "step": 21244 }, { "epoch": 0.7608286926782101, "grad_norm": 1.8041224479675293, "learning_rate": 2.8528660568204247e-05, "loss": 1.7292, "step": 21245 }, { "epoch": 0.7608645048077783, "grad_norm": 1.383401870727539, "learning_rate": 2.8520548496994536e-05, "loss": 1.2626, "step": 21246 }, { "epoch": 0.7609003169373467, "grad_norm": 1.8830630779266357, "learning_rate": 2.851243738744862e-05, "loss": 1.5501, "step": 21247 }, { "epoch": 0.760936129066915, "grad_norm": 2.0602715015411377, "learning_rate": 2.8504327239675645e-05, "loss": 1.3759, "step": 21248 }, { "epoch": 0.7609719411964833, "grad_norm": 1.9153468608856201, "learning_rate": 2.849621805378474e-05, "loss": 1.3334, "step": 21249 }, { "epoch": 0.7610077533260515, "grad_norm": 1.9879709482192993, "learning_rate": 2.848810982988497e-05, "loss": 1.5984, "step": 21250 }, { "epoch": 0.7610435654556198, "grad_norm": 1.8865492343902588, "learning_rate": 2.848000256808544e-05, "loss": 1.3962, "step": 21251 }, { "epoch": 0.7610793775851881, "grad_norm": 2.2912323474884033, "learning_rate": 2.8471896268495214e-05, "loss": 1.5664, "step": 21252 }, { "epoch": 0.7611151897147563, "grad_norm": 1.8442264795303345, "learning_rate": 2.84637909312234e-05, "loss": 1.3058, "step": 21253 }, { "epoch": 0.7611510018443247, "grad_norm": 2.1874215602874756, "learning_rate": 2.845568655637896e-05, "loss": 1.6448, "step": 21254 }, { "epoch": 0.761186813973893, "grad_norm": 1.9503223896026611, "learning_rate": 2.844758314407098e-05, "loss": 1.1485, "step": 21255 }, { "epoch": 0.7612226261034613, "grad_norm": 1.5357619524002075, "learning_rate": 2.8439480694408506e-05, "loss": 1.5472, "step": 21256 }, { "epoch": 0.7612584382330295, "grad_norm": 1.703912377357483, "learning_rate": 2.8431379207500476e-05, "loss": 1.6161, "step": 21257 }, { "epoch": 0.7612942503625978, "grad_norm": 2.0555579662323, "learning_rate": 2.8423278683455922e-05, "loss": 1.7808, "step": 21258 }, { "epoch": 0.7613300624921661, "grad_norm": 1.4263310432434082, "learning_rate": 2.8415179122383828e-05, "loss": 1.0745, "step": 21259 }, { "epoch": 0.7613658746217343, "grad_norm": 2.0803184509277344, "learning_rate": 2.840708052439319e-05, "loss": 1.5224, "step": 21260 }, { "epoch": 0.7614016867513027, "grad_norm": 1.6625678539276123, "learning_rate": 2.8398982889592908e-05, "loss": 1.2313, "step": 21261 }, { "epoch": 0.761437498880871, "grad_norm": 1.6900227069854736, "learning_rate": 2.839088621809195e-05, "loss": 1.4897, "step": 21262 }, { "epoch": 0.7614733110104392, "grad_norm": 1.6262860298156738, "learning_rate": 2.8382790509999257e-05, "loss": 1.4142, "step": 21263 }, { "epoch": 0.7615091231400075, "grad_norm": 1.7828209400177002, "learning_rate": 2.8374695765423753e-05, "loss": 1.7341, "step": 21264 }, { "epoch": 0.7615449352695758, "grad_norm": 2.0078964233398438, "learning_rate": 2.8366601984474305e-05, "loss": 1.5635, "step": 21265 }, { "epoch": 0.761580747399144, "grad_norm": 2.1161463260650635, "learning_rate": 2.835850916725983e-05, "loss": 1.4108, "step": 21266 }, { "epoch": 0.7616165595287123, "grad_norm": 1.6242632865905762, "learning_rate": 2.8350417313889233e-05, "loss": 1.0424, "step": 21267 }, { "epoch": 0.7616523716582807, "grad_norm": 1.9338126182556152, "learning_rate": 2.8342326424471323e-05, "loss": 1.4046, "step": 21268 }, { "epoch": 0.761688183787849, "grad_norm": 1.9795308113098145, "learning_rate": 2.8334236499114963e-05, "loss": 1.5743, "step": 21269 }, { "epoch": 0.7617239959174172, "grad_norm": 1.2497432231903076, "learning_rate": 2.8326147537929027e-05, "loss": 1.5952, "step": 21270 }, { "epoch": 0.7617598080469855, "grad_norm": 1.5189738273620605, "learning_rate": 2.8318059541022346e-05, "loss": 1.6455, "step": 21271 }, { "epoch": 0.7617956201765538, "grad_norm": 1.9544116258621216, "learning_rate": 2.830997250850368e-05, "loss": 1.438, "step": 21272 }, { "epoch": 0.761831432306122, "grad_norm": 1.6780002117156982, "learning_rate": 2.8301886440481862e-05, "loss": 1.2535, "step": 21273 }, { "epoch": 0.7618672444356903, "grad_norm": 2.1269612312316895, "learning_rate": 2.8293801337065705e-05, "loss": 1.2494, "step": 21274 }, { "epoch": 0.7619030565652587, "grad_norm": 2.0308775901794434, "learning_rate": 2.8285717198363924e-05, "loss": 1.6469, "step": 21275 }, { "epoch": 0.761938868694827, "grad_norm": 2.287074565887451, "learning_rate": 2.8277634024485322e-05, "loss": 1.2689, "step": 21276 }, { "epoch": 0.7619746808243952, "grad_norm": 1.8691420555114746, "learning_rate": 2.826955181553863e-05, "loss": 1.2943, "step": 21277 }, { "epoch": 0.7620104929539635, "grad_norm": 2.088134765625, "learning_rate": 2.826147057163263e-05, "loss": 1.2476, "step": 21278 }, { "epoch": 0.7620463050835318, "grad_norm": 2.5623204708099365, "learning_rate": 2.8253390292875982e-05, "loss": 1.7233, "step": 21279 }, { "epoch": 0.7620821172131, "grad_norm": 1.437081217765808, "learning_rate": 2.8245310979377416e-05, "loss": 1.507, "step": 21280 }, { "epoch": 0.7621179293426683, "grad_norm": 1.5275484323501587, "learning_rate": 2.8237232631245624e-05, "loss": 1.5571, "step": 21281 }, { "epoch": 0.7621537414722367, "grad_norm": 2.310119152069092, "learning_rate": 2.8229155248589345e-05, "loss": 1.687, "step": 21282 }, { "epoch": 0.762189553601805, "grad_norm": 1.3541539907455444, "learning_rate": 2.822107883151719e-05, "loss": 1.5511, "step": 21283 }, { "epoch": 0.7622253657313732, "grad_norm": 1.8010655641555786, "learning_rate": 2.8213003380137783e-05, "loss": 1.6483, "step": 21284 }, { "epoch": 0.7622611778609415, "grad_norm": 1.4328798055648804, "learning_rate": 2.820492889455987e-05, "loss": 1.486, "step": 21285 }, { "epoch": 0.7622969899905098, "grad_norm": 1.6243557929992676, "learning_rate": 2.8196855374892006e-05, "loss": 1.6467, "step": 21286 }, { "epoch": 0.762332802120078, "grad_norm": 1.6708301305770874, "learning_rate": 2.8188782821242855e-05, "loss": 1.6195, "step": 21287 }, { "epoch": 0.7623686142496463, "grad_norm": 1.5732182264328003, "learning_rate": 2.8180711233720947e-05, "loss": 1.3332, "step": 21288 }, { "epoch": 0.7624044263792147, "grad_norm": 1.813924789428711, "learning_rate": 2.8172640612434987e-05, "loss": 1.285, "step": 21289 }, { "epoch": 0.762440238508783, "grad_norm": 2.3234944343566895, "learning_rate": 2.8164570957493473e-05, "loss": 1.3432, "step": 21290 }, { "epoch": 0.7624760506383512, "grad_norm": 1.5255062580108643, "learning_rate": 2.8156502269004992e-05, "loss": 1.472, "step": 21291 }, { "epoch": 0.7625118627679195, "grad_norm": 2.036771535873413, "learning_rate": 2.814843454707813e-05, "loss": 1.3433, "step": 21292 }, { "epoch": 0.7625476748974878, "grad_norm": 2.0992271900177, "learning_rate": 2.8140367791821363e-05, "loss": 1.2408, "step": 21293 }, { "epoch": 0.762583487027056, "grad_norm": 1.3696377277374268, "learning_rate": 2.813230200334329e-05, "loss": 1.1414, "step": 21294 }, { "epoch": 0.7626192991566243, "grad_norm": 2.283036231994629, "learning_rate": 2.8124237181752334e-05, "loss": 1.5427, "step": 21295 }, { "epoch": 0.7626551112861927, "grad_norm": 1.7576613426208496, "learning_rate": 2.8116173327157114e-05, "loss": 1.338, "step": 21296 }, { "epoch": 0.762690923415761, "grad_norm": 1.6552729606628418, "learning_rate": 2.8108110439666024e-05, "loss": 1.3773, "step": 21297 }, { "epoch": 0.7627267355453292, "grad_norm": 1.3806850910186768, "learning_rate": 2.8100048519387613e-05, "loss": 1.3199, "step": 21298 }, { "epoch": 0.7627625476748975, "grad_norm": 1.8089537620544434, "learning_rate": 2.8091987566430233e-05, "loss": 1.2466, "step": 21299 }, { "epoch": 0.7627983598044658, "grad_norm": 1.5321838855743408, "learning_rate": 2.808392758090247e-05, "loss": 1.4227, "step": 21300 }, { "epoch": 0.762834171934034, "grad_norm": 1.6883888244628906, "learning_rate": 2.80758685629127e-05, "loss": 1.4317, "step": 21301 }, { "epoch": 0.7628699840636023, "grad_norm": 1.5351834297180176, "learning_rate": 2.8067810512569282e-05, "loss": 1.1779, "step": 21302 }, { "epoch": 0.7629057961931707, "grad_norm": 2.15999174118042, "learning_rate": 2.805975342998075e-05, "loss": 1.4464, "step": 21303 }, { "epoch": 0.7629416083227389, "grad_norm": 1.7024953365325928, "learning_rate": 2.80516973152554e-05, "loss": 1.3371, "step": 21304 }, { "epoch": 0.7629774204523072, "grad_norm": 1.595112919807434, "learning_rate": 2.8043642168501692e-05, "loss": 1.4669, "step": 21305 }, { "epoch": 0.7630132325818755, "grad_norm": 1.7060620784759521, "learning_rate": 2.8035587989827904e-05, "loss": 1.2248, "step": 21306 }, { "epoch": 0.7630490447114437, "grad_norm": 1.508658766746521, "learning_rate": 2.802753477934251e-05, "loss": 1.4452, "step": 21307 }, { "epoch": 0.763084856841012, "grad_norm": 3.327977180480957, "learning_rate": 2.8019482537153762e-05, "loss": 1.7054, "step": 21308 }, { "epoch": 0.7631206689705803, "grad_norm": 1.5800703763961792, "learning_rate": 2.801143126337007e-05, "loss": 1.508, "step": 21309 }, { "epoch": 0.7631564811001487, "grad_norm": 1.645244836807251, "learning_rate": 2.8003380958099677e-05, "loss": 1.5809, "step": 21310 }, { "epoch": 0.7631922932297169, "grad_norm": 1.597456455230713, "learning_rate": 2.7995331621450917e-05, "loss": 1.2949, "step": 21311 }, { "epoch": 0.7632281053592852, "grad_norm": 1.4710382223129272, "learning_rate": 2.7987283253532125e-05, "loss": 1.1875, "step": 21312 }, { "epoch": 0.7632639174888535, "grad_norm": 2.0475046634674072, "learning_rate": 2.7979235854451523e-05, "loss": 1.7837, "step": 21313 }, { "epoch": 0.7632997296184217, "grad_norm": 1.670210599899292, "learning_rate": 2.79711894243174e-05, "loss": 1.6562, "step": 21314 }, { "epoch": 0.76333554174799, "grad_norm": 1.4853568077087402, "learning_rate": 2.7963143963238005e-05, "loss": 1.443, "step": 21315 }, { "epoch": 0.7633713538775583, "grad_norm": 1.7376928329467773, "learning_rate": 2.795509947132162e-05, "loss": 1.2882, "step": 21316 }, { "epoch": 0.7634071660071267, "grad_norm": 1.8859137296676636, "learning_rate": 2.7947055948676392e-05, "loss": 1.7188, "step": 21317 }, { "epoch": 0.7634429781366949, "grad_norm": 1.6725388765335083, "learning_rate": 2.793901339541063e-05, "loss": 1.4696, "step": 21318 }, { "epoch": 0.7634787902662632, "grad_norm": 1.5267573595046997, "learning_rate": 2.79309718116325e-05, "loss": 1.3887, "step": 21319 }, { "epoch": 0.7635146023958315, "grad_norm": 2.08896541595459, "learning_rate": 2.792293119745014e-05, "loss": 1.4965, "step": 21320 }, { "epoch": 0.7635504145253997, "grad_norm": 1.5745618343353271, "learning_rate": 2.7914891552971776e-05, "loss": 1.5053, "step": 21321 }, { "epoch": 0.763586226654968, "grad_norm": 1.7663930654525757, "learning_rate": 2.7906852878305567e-05, "loss": 1.431, "step": 21322 }, { "epoch": 0.7636220387845363, "grad_norm": 1.428335428237915, "learning_rate": 2.789881517355969e-05, "loss": 1.6087, "step": 21323 }, { "epoch": 0.7636578509141047, "grad_norm": 1.546515703201294, "learning_rate": 2.7890778438842214e-05, "loss": 1.1567, "step": 21324 }, { "epoch": 0.7636936630436729, "grad_norm": 2.3970510959625244, "learning_rate": 2.7882742674261307e-05, "loss": 1.8865, "step": 21325 }, { "epoch": 0.7637294751732412, "grad_norm": 1.486477255821228, "learning_rate": 2.78747078799251e-05, "loss": 1.5473, "step": 21326 }, { "epoch": 0.7637652873028095, "grad_norm": 1.832295298576355, "learning_rate": 2.786667405594163e-05, "loss": 1.3825, "step": 21327 }, { "epoch": 0.7638010994323777, "grad_norm": 1.7424432039260864, "learning_rate": 2.785864120241901e-05, "loss": 1.503, "step": 21328 }, { "epoch": 0.763836911561946, "grad_norm": 1.831028938293457, "learning_rate": 2.7850609319465325e-05, "loss": 1.5769, "step": 21329 }, { "epoch": 0.7638727236915143, "grad_norm": 1.4432202577590942, "learning_rate": 2.7842578407188656e-05, "loss": 1.3591, "step": 21330 }, { "epoch": 0.7639085358210826, "grad_norm": 1.3321956396102905, "learning_rate": 2.7834548465696987e-05, "loss": 1.4144, "step": 21331 }, { "epoch": 0.7639443479506509, "grad_norm": 1.7596930265426636, "learning_rate": 2.7826519495098378e-05, "loss": 1.4145, "step": 21332 }, { "epoch": 0.7639801600802192, "grad_norm": 1.5014865398406982, "learning_rate": 2.7818491495500864e-05, "loss": 1.4459, "step": 21333 }, { "epoch": 0.7640159722097875, "grad_norm": 2.288329601287842, "learning_rate": 2.7810464467012455e-05, "loss": 1.7881, "step": 21334 }, { "epoch": 0.7640517843393557, "grad_norm": 1.4313377141952515, "learning_rate": 2.7802438409741106e-05, "loss": 1.5355, "step": 21335 }, { "epoch": 0.764087596468924, "grad_norm": 1.7000243663787842, "learning_rate": 2.7794413323794822e-05, "loss": 1.3548, "step": 21336 }, { "epoch": 0.7641234085984923, "grad_norm": 1.5719764232635498, "learning_rate": 2.7786389209281592e-05, "loss": 1.3211, "step": 21337 }, { "epoch": 0.7641592207280606, "grad_norm": 1.5126636028289795, "learning_rate": 2.7778366066309326e-05, "loss": 1.3154, "step": 21338 }, { "epoch": 0.7641950328576289, "grad_norm": 1.7162196636199951, "learning_rate": 2.7770343894985974e-05, "loss": 1.4414, "step": 21339 }, { "epoch": 0.7642308449871972, "grad_norm": 1.7152540683746338, "learning_rate": 2.7762322695419485e-05, "loss": 1.9311, "step": 21340 }, { "epoch": 0.7642666571167654, "grad_norm": 2.3539340496063232, "learning_rate": 2.7754302467717785e-05, "loss": 1.6651, "step": 21341 }, { "epoch": 0.7643024692463337, "grad_norm": 1.1298686265945435, "learning_rate": 2.7746283211988734e-05, "loss": 1.4936, "step": 21342 }, { "epoch": 0.764338281375902, "grad_norm": 1.6960740089416504, "learning_rate": 2.773826492834023e-05, "loss": 1.3619, "step": 21343 }, { "epoch": 0.7643740935054703, "grad_norm": 1.8697292804718018, "learning_rate": 2.77302476168802e-05, "loss": 1.4366, "step": 21344 }, { "epoch": 0.7644099056350386, "grad_norm": 1.8246264457702637, "learning_rate": 2.7722231277716437e-05, "loss": 1.3414, "step": 21345 }, { "epoch": 0.7644457177646069, "grad_norm": 2.1910572052001953, "learning_rate": 2.771421591095682e-05, "loss": 1.8065, "step": 21346 }, { "epoch": 0.7644815298941752, "grad_norm": 1.7753005027770996, "learning_rate": 2.7706201516709175e-05, "loss": 1.0744, "step": 21347 }, { "epoch": 0.7645173420237434, "grad_norm": 1.61532723903656, "learning_rate": 2.769818809508138e-05, "loss": 1.5135, "step": 21348 }, { "epoch": 0.7645531541533117, "grad_norm": 1.4952168464660645, "learning_rate": 2.769017564618117e-05, "loss": 1.1487, "step": 21349 }, { "epoch": 0.76458896628288, "grad_norm": 1.5021734237670898, "learning_rate": 2.7682164170116365e-05, "loss": 1.6482, "step": 21350 }, { "epoch": 0.7646247784124482, "grad_norm": 1.436759114265442, "learning_rate": 2.767415366699476e-05, "loss": 1.1454, "step": 21351 }, { "epoch": 0.7646605905420166, "grad_norm": 1.3185434341430664, "learning_rate": 2.7666144136924166e-05, "loss": 1.3639, "step": 21352 }, { "epoch": 0.7646964026715849, "grad_norm": 1.667108416557312, "learning_rate": 2.7658135580012256e-05, "loss": 1.135, "step": 21353 }, { "epoch": 0.7647322148011532, "grad_norm": 1.9361541271209717, "learning_rate": 2.7650127996366826e-05, "loss": 1.2538, "step": 21354 }, { "epoch": 0.7647680269307214, "grad_norm": 1.9174108505249023, "learning_rate": 2.764212138609562e-05, "loss": 1.5039, "step": 21355 }, { "epoch": 0.7648038390602897, "grad_norm": 2.271050214767456, "learning_rate": 2.7634115749306312e-05, "loss": 1.4584, "step": 21356 }, { "epoch": 0.764839651189858, "grad_norm": 1.594559907913208, "learning_rate": 2.762611108610663e-05, "loss": 1.1316, "step": 21357 }, { "epoch": 0.7648754633194262, "grad_norm": 1.610710859298706, "learning_rate": 2.7618107396604263e-05, "loss": 1.5176, "step": 21358 }, { "epoch": 0.7649112754489946, "grad_norm": 1.653333306312561, "learning_rate": 2.7610104680906933e-05, "loss": 1.4896, "step": 21359 }, { "epoch": 0.7649470875785629, "grad_norm": 1.548635721206665, "learning_rate": 2.760210293912223e-05, "loss": 1.7664, "step": 21360 }, { "epoch": 0.7649828997081312, "grad_norm": 1.920652985572815, "learning_rate": 2.759410217135786e-05, "loss": 1.4373, "step": 21361 }, { "epoch": 0.7650187118376994, "grad_norm": 1.9298456907272339, "learning_rate": 2.7586102377721467e-05, "loss": 1.2179, "step": 21362 }, { "epoch": 0.7650545239672677, "grad_norm": 1.5186060667037964, "learning_rate": 2.7578103558320623e-05, "loss": 1.5287, "step": 21363 }, { "epoch": 0.765090336096836, "grad_norm": 1.91496741771698, "learning_rate": 2.7570105713262995e-05, "loss": 1.4081, "step": 21364 }, { "epoch": 0.7651261482264042, "grad_norm": 2.4558486938476562, "learning_rate": 2.7562108842656152e-05, "loss": 1.1442, "step": 21365 }, { "epoch": 0.7651619603559726, "grad_norm": 1.984779715538025, "learning_rate": 2.7554112946607735e-05, "loss": 1.5647, "step": 21366 }, { "epoch": 0.7651977724855409, "grad_norm": 1.4279773235321045, "learning_rate": 2.7546118025225244e-05, "loss": 1.4004, "step": 21367 }, { "epoch": 0.7652335846151092, "grad_norm": 1.703660011291504, "learning_rate": 2.7538124078616278e-05, "loss": 1.4426, "step": 21368 }, { "epoch": 0.7652693967446774, "grad_norm": 1.6676280498504639, "learning_rate": 2.753013110688839e-05, "loss": 1.3846, "step": 21369 }, { "epoch": 0.7653052088742457, "grad_norm": 1.7314229011535645, "learning_rate": 2.7522139110149125e-05, "loss": 1.7917, "step": 21370 }, { "epoch": 0.765341021003814, "grad_norm": 1.8637425899505615, "learning_rate": 2.7514148088505998e-05, "loss": 1.595, "step": 21371 }, { "epoch": 0.7653768331333822, "grad_norm": 2.5171310901641846, "learning_rate": 2.7506158042066454e-05, "loss": 1.4561, "step": 21372 }, { "epoch": 0.7654126452629506, "grad_norm": 1.657228708267212, "learning_rate": 2.74981689709381e-05, "loss": 1.5529, "step": 21373 }, { "epoch": 0.7654484573925189, "grad_norm": 1.326586365699768, "learning_rate": 2.749018087522832e-05, "loss": 1.0958, "step": 21374 }, { "epoch": 0.7654842695220871, "grad_norm": 1.817240595817566, "learning_rate": 2.7482193755044637e-05, "loss": 1.5557, "step": 21375 }, { "epoch": 0.7655200816516554, "grad_norm": 1.442419171333313, "learning_rate": 2.7474207610494495e-05, "loss": 1.2791, "step": 21376 }, { "epoch": 0.7655558937812237, "grad_norm": 1.3630636930465698, "learning_rate": 2.7466222441685362e-05, "loss": 1.1471, "step": 21377 }, { "epoch": 0.765591705910792, "grad_norm": 1.8548414707183838, "learning_rate": 2.7458238248724623e-05, "loss": 1.5049, "step": 21378 }, { "epoch": 0.7656275180403602, "grad_norm": 1.5222378969192505, "learning_rate": 2.7450255031719707e-05, "loss": 1.5707, "step": 21379 }, { "epoch": 0.7656633301699286, "grad_norm": 1.5617293119430542, "learning_rate": 2.7442272790778057e-05, "loss": 1.1615, "step": 21380 }, { "epoch": 0.7656991422994969, "grad_norm": 1.7830644845962524, "learning_rate": 2.7434291526007004e-05, "loss": 1.5264, "step": 21381 }, { "epoch": 0.7657349544290651, "grad_norm": 1.7911914587020874, "learning_rate": 2.742631123751399e-05, "loss": 1.3538, "step": 21382 }, { "epoch": 0.7657707665586334, "grad_norm": 1.432540774345398, "learning_rate": 2.7418331925406293e-05, "loss": 1.458, "step": 21383 }, { "epoch": 0.7658065786882017, "grad_norm": 2.0374929904937744, "learning_rate": 2.741035358979136e-05, "loss": 1.5482, "step": 21384 }, { "epoch": 0.76584239081777, "grad_norm": 1.5840460062026978, "learning_rate": 2.7402376230776473e-05, "loss": 1.3272, "step": 21385 }, { "epoch": 0.7658782029473382, "grad_norm": 1.9470570087432861, "learning_rate": 2.7394399848468953e-05, "loss": 1.4448, "step": 21386 }, { "epoch": 0.7659140150769066, "grad_norm": 1.5973190069198608, "learning_rate": 2.7386424442976132e-05, "loss": 1.3651, "step": 21387 }, { "epoch": 0.7659498272064749, "grad_norm": 2.248100996017456, "learning_rate": 2.7378450014405342e-05, "loss": 1.4034, "step": 21388 }, { "epoch": 0.7659856393360431, "grad_norm": 1.8361610174179077, "learning_rate": 2.7370476562863835e-05, "loss": 1.3061, "step": 21389 }, { "epoch": 0.7660214514656114, "grad_norm": 1.7707544565200806, "learning_rate": 2.7362504088458807e-05, "loss": 1.3279, "step": 21390 }, { "epoch": 0.7660572635951797, "grad_norm": 1.2263545989990234, "learning_rate": 2.7354532591297666e-05, "loss": 1.6046, "step": 21391 }, { "epoch": 0.7660930757247479, "grad_norm": 1.5667316913604736, "learning_rate": 2.7346562071487537e-05, "loss": 1.3985, "step": 21392 }, { "epoch": 0.7661288878543162, "grad_norm": 1.6585215330123901, "learning_rate": 2.7338592529135744e-05, "loss": 1.5769, "step": 21393 }, { "epoch": 0.7661646999838846, "grad_norm": 1.4081734418869019, "learning_rate": 2.7330623964349387e-05, "loss": 1.5353, "step": 21394 }, { "epoch": 0.7662005121134529, "grad_norm": 2.058746337890625, "learning_rate": 2.732265637723582e-05, "loss": 1.4592, "step": 21395 }, { "epoch": 0.7662363242430211, "grad_norm": 1.8329404592514038, "learning_rate": 2.7314689767902134e-05, "loss": 1.3414, "step": 21396 }, { "epoch": 0.7662721363725894, "grad_norm": 1.811803936958313, "learning_rate": 2.7306724136455564e-05, "loss": 1.3135, "step": 21397 }, { "epoch": 0.7663079485021577, "grad_norm": 1.5890330076217651, "learning_rate": 2.7298759483003223e-05, "loss": 1.0912, "step": 21398 }, { "epoch": 0.7663437606317259, "grad_norm": 1.5598218441009521, "learning_rate": 2.7290795807652305e-05, "loss": 1.7041, "step": 21399 }, { "epoch": 0.7663795727612942, "grad_norm": 1.9628334045410156, "learning_rate": 2.7282833110509952e-05, "loss": 1.6035, "step": 21400 }, { "epoch": 0.7664153848908626, "grad_norm": 1.3994742631912231, "learning_rate": 2.7274871391683243e-05, "loss": 1.0589, "step": 21401 }, { "epoch": 0.7664511970204309, "grad_norm": 1.670101284980774, "learning_rate": 2.7266910651279376e-05, "loss": 1.189, "step": 21402 }, { "epoch": 0.7664870091499991, "grad_norm": 1.4442775249481201, "learning_rate": 2.725895088940539e-05, "loss": 1.3845, "step": 21403 }, { "epoch": 0.7665228212795674, "grad_norm": 1.8506652116775513, "learning_rate": 2.7250992106168406e-05, "loss": 1.6786, "step": 21404 }, { "epoch": 0.7665586334091357, "grad_norm": 1.296434760093689, "learning_rate": 2.724303430167543e-05, "loss": 1.384, "step": 21405 }, { "epoch": 0.7665944455387039, "grad_norm": 1.8313742876052856, "learning_rate": 2.7235077476033645e-05, "loss": 1.4286, "step": 21406 }, { "epoch": 0.7666302576682722, "grad_norm": 1.7008599042892456, "learning_rate": 2.7227121629350016e-05, "loss": 1.5361, "step": 21407 }, { "epoch": 0.7666660697978406, "grad_norm": 1.674019455909729, "learning_rate": 2.7219166761731585e-05, "loss": 1.6992, "step": 21408 }, { "epoch": 0.7667018819274088, "grad_norm": 1.7322226762771606, "learning_rate": 2.7211212873285376e-05, "loss": 1.4695, "step": 21409 }, { "epoch": 0.7667376940569771, "grad_norm": 1.8530694246292114, "learning_rate": 2.72032599641184e-05, "loss": 1.3144, "step": 21410 }, { "epoch": 0.7667735061865454, "grad_norm": 1.8700464963912964, "learning_rate": 2.7195308034337698e-05, "loss": 1.2808, "step": 21411 }, { "epoch": 0.7668093183161137, "grad_norm": 1.8522310256958008, "learning_rate": 2.7187357084050147e-05, "loss": 1.2198, "step": 21412 }, { "epoch": 0.7668451304456819, "grad_norm": 2.2776641845703125, "learning_rate": 2.7179407113362853e-05, "loss": 1.6649, "step": 21413 }, { "epoch": 0.7668809425752502, "grad_norm": 2.573241949081421, "learning_rate": 2.7171458122382675e-05, "loss": 1.4936, "step": 21414 }, { "epoch": 0.7669167547048186, "grad_norm": 1.872636318206787, "learning_rate": 2.7163510111216618e-05, "loss": 1.4336, "step": 21415 }, { "epoch": 0.7669525668343868, "grad_norm": 2.0823798179626465, "learning_rate": 2.7155563079971535e-05, "loss": 1.5788, "step": 21416 }, { "epoch": 0.7669883789639551, "grad_norm": 1.9190607070922852, "learning_rate": 2.71476170287544e-05, "loss": 1.3688, "step": 21417 }, { "epoch": 0.7670241910935234, "grad_norm": 1.5269092321395874, "learning_rate": 2.713967195767214e-05, "loss": 1.4576, "step": 21418 }, { "epoch": 0.7670600032230916, "grad_norm": 1.5405995845794678, "learning_rate": 2.713172786683157e-05, "loss": 1.3906, "step": 21419 }, { "epoch": 0.7670958153526599, "grad_norm": 1.7105742692947388, "learning_rate": 2.712378475633961e-05, "loss": 1.6096, "step": 21420 }, { "epoch": 0.7671316274822282, "grad_norm": 1.8407995700836182, "learning_rate": 2.7115842626303134e-05, "loss": 1.4489, "step": 21421 }, { "epoch": 0.7671674396117966, "grad_norm": 1.9620721340179443, "learning_rate": 2.7107901476829e-05, "loss": 1.6845, "step": 21422 }, { "epoch": 0.7672032517413648, "grad_norm": 2.0660271644592285, "learning_rate": 2.7099961308024004e-05, "loss": 1.1152, "step": 21423 }, { "epoch": 0.7672390638709331, "grad_norm": 1.7892515659332275, "learning_rate": 2.7092022119994988e-05, "loss": 1.3035, "step": 21424 }, { "epoch": 0.7672748760005014, "grad_norm": 1.1042615175247192, "learning_rate": 2.70840839128488e-05, "loss": 1.2817, "step": 21425 }, { "epoch": 0.7673106881300696, "grad_norm": 1.3992247581481934, "learning_rate": 2.7076146686692184e-05, "loss": 1.4926, "step": 21426 }, { "epoch": 0.7673465002596379, "grad_norm": 2.207401990890503, "learning_rate": 2.7068210441631947e-05, "loss": 1.4668, "step": 21427 }, { "epoch": 0.7673823123892062, "grad_norm": 1.5987458229064941, "learning_rate": 2.7060275177774862e-05, "loss": 1.3982, "step": 21428 }, { "epoch": 0.7674181245187746, "grad_norm": 1.4975227117538452, "learning_rate": 2.7052340895227714e-05, "loss": 1.7043, "step": 21429 }, { "epoch": 0.7674539366483428, "grad_norm": 1.8431814908981323, "learning_rate": 2.7044407594097197e-05, "loss": 1.618, "step": 21430 }, { "epoch": 0.7674897487779111, "grad_norm": 1.6562138795852661, "learning_rate": 2.703647527449007e-05, "loss": 1.2938, "step": 21431 }, { "epoch": 0.7675255609074794, "grad_norm": 1.7366337776184082, "learning_rate": 2.7028543936513086e-05, "loss": 1.255, "step": 21432 }, { "epoch": 0.7675613730370476, "grad_norm": 1.6569148302078247, "learning_rate": 2.7020613580272893e-05, "loss": 1.4548, "step": 21433 }, { "epoch": 0.7675971851666159, "grad_norm": 1.7070738077163696, "learning_rate": 2.7012684205876192e-05, "loss": 1.226, "step": 21434 }, { "epoch": 0.7676329972961842, "grad_norm": 1.5093258619308472, "learning_rate": 2.7004755813429683e-05, "loss": 1.4913, "step": 21435 }, { "epoch": 0.7676688094257526, "grad_norm": 1.564212441444397, "learning_rate": 2.6996828403040064e-05, "loss": 1.5233, "step": 21436 }, { "epoch": 0.7677046215553208, "grad_norm": 1.472865343093872, "learning_rate": 2.698890197481392e-05, "loss": 1.4532, "step": 21437 }, { "epoch": 0.7677404336848891, "grad_norm": 2.028477430343628, "learning_rate": 2.6980976528857915e-05, "loss": 1.3926, "step": 21438 }, { "epoch": 0.7677762458144574, "grad_norm": 1.4569189548492432, "learning_rate": 2.697305206527869e-05, "loss": 1.2849, "step": 21439 }, { "epoch": 0.7678120579440256, "grad_norm": 1.7602864503860474, "learning_rate": 2.6965128584182886e-05, "loss": 1.3607, "step": 21440 }, { "epoch": 0.7678478700735939, "grad_norm": 1.7039145231246948, "learning_rate": 2.6957206085677023e-05, "loss": 1.6376, "step": 21441 }, { "epoch": 0.7678836822031622, "grad_norm": 2.3543314933776855, "learning_rate": 2.694928456986775e-05, "loss": 1.5835, "step": 21442 }, { "epoch": 0.7679194943327305, "grad_norm": 1.356101393699646, "learning_rate": 2.6941364036861638e-05, "loss": 1.0908, "step": 21443 }, { "epoch": 0.7679553064622988, "grad_norm": 1.6495617628097534, "learning_rate": 2.6933444486765212e-05, "loss": 1.3593, "step": 21444 }, { "epoch": 0.7679911185918671, "grad_norm": 1.7570487260818481, "learning_rate": 2.6925525919685047e-05, "loss": 1.6665, "step": 21445 }, { "epoch": 0.7680269307214354, "grad_norm": 1.7626287937164307, "learning_rate": 2.6917608335727675e-05, "loss": 1.5255, "step": 21446 }, { "epoch": 0.7680627428510036, "grad_norm": 1.4852346181869507, "learning_rate": 2.6909691734999633e-05, "loss": 1.3978, "step": 21447 }, { "epoch": 0.7680985549805719, "grad_norm": 1.377875566482544, "learning_rate": 2.690177611760738e-05, "loss": 1.4612, "step": 21448 }, { "epoch": 0.7681343671101402, "grad_norm": 1.5788637399673462, "learning_rate": 2.6893861483657436e-05, "loss": 1.529, "step": 21449 }, { "epoch": 0.7681701792397085, "grad_norm": 1.2312066555023193, "learning_rate": 2.688594783325632e-05, "loss": 1.4575, "step": 21450 }, { "epoch": 0.7682059913692768, "grad_norm": 2.2032153606414795, "learning_rate": 2.687803516651044e-05, "loss": 1.4401, "step": 21451 }, { "epoch": 0.7682418034988451, "grad_norm": 1.4819837808609009, "learning_rate": 2.6870123483526276e-05, "loss": 1.329, "step": 21452 }, { "epoch": 0.7682776156284133, "grad_norm": 1.5559954643249512, "learning_rate": 2.6862212784410258e-05, "loss": 1.3719, "step": 21453 }, { "epoch": 0.7683134277579816, "grad_norm": 1.2472002506256104, "learning_rate": 2.685430306926887e-05, "loss": 1.3489, "step": 21454 }, { "epoch": 0.7683492398875499, "grad_norm": 1.4845582246780396, "learning_rate": 2.6846394338208446e-05, "loss": 1.4089, "step": 21455 }, { "epoch": 0.7683850520171182, "grad_norm": 2.007016181945801, "learning_rate": 2.683848659133542e-05, "loss": 1.4288, "step": 21456 }, { "epoch": 0.7684208641466865, "grad_norm": 1.433982253074646, "learning_rate": 2.68305798287562e-05, "loss": 1.3565, "step": 21457 }, { "epoch": 0.7684566762762548, "grad_norm": 1.5630964040756226, "learning_rate": 2.682267405057717e-05, "loss": 1.4758, "step": 21458 }, { "epoch": 0.7684924884058231, "grad_norm": 2.2693796157836914, "learning_rate": 2.6814769256904627e-05, "loss": 1.3924, "step": 21459 }, { "epoch": 0.7685283005353913, "grad_norm": 1.6751092672348022, "learning_rate": 2.6806865447844974e-05, "loss": 1.4485, "step": 21460 }, { "epoch": 0.7685641126649596, "grad_norm": 1.8391053676605225, "learning_rate": 2.6798962623504566e-05, "loss": 1.6818, "step": 21461 }, { "epoch": 0.7685999247945279, "grad_norm": 1.5685383081436157, "learning_rate": 2.6791060783989653e-05, "loss": 1.2592, "step": 21462 }, { "epoch": 0.7686357369240961, "grad_norm": 1.350655436515808, "learning_rate": 2.678315992940659e-05, "loss": 1.3298, "step": 21463 }, { "epoch": 0.7686715490536645, "grad_norm": 1.6182117462158203, "learning_rate": 2.6775260059861673e-05, "loss": 1.4552, "step": 21464 }, { "epoch": 0.7687073611832328, "grad_norm": 1.374692440032959, "learning_rate": 2.6767361175461202e-05, "loss": 1.2673, "step": 21465 }, { "epoch": 0.7687431733128011, "grad_norm": 1.6027882099151611, "learning_rate": 2.6759463276311393e-05, "loss": 1.2951, "step": 21466 }, { "epoch": 0.7687789854423693, "grad_norm": 1.84087336063385, "learning_rate": 2.675156636251853e-05, "loss": 1.3431, "step": 21467 }, { "epoch": 0.7688147975719376, "grad_norm": 2.3304686546325684, "learning_rate": 2.6743670434188893e-05, "loss": 1.6684, "step": 21468 }, { "epoch": 0.7688506097015059, "grad_norm": 2.1128644943237305, "learning_rate": 2.673577549142864e-05, "loss": 1.5036, "step": 21469 }, { "epoch": 0.7688864218310741, "grad_norm": 1.8388144969940186, "learning_rate": 2.6727881534344057e-05, "loss": 1.6742, "step": 21470 }, { "epoch": 0.7689222339606425, "grad_norm": 1.973982810974121, "learning_rate": 2.6719988563041264e-05, "loss": 1.4739, "step": 21471 }, { "epoch": 0.7689580460902108, "grad_norm": 1.7554025650024414, "learning_rate": 2.6712096577626543e-05, "loss": 1.4875, "step": 21472 }, { "epoch": 0.7689938582197791, "grad_norm": 1.8182271718978882, "learning_rate": 2.670420557820601e-05, "loss": 1.8165, "step": 21473 }, { "epoch": 0.7690296703493473, "grad_norm": 1.4545783996582031, "learning_rate": 2.6696315564885844e-05, "loss": 1.5724, "step": 21474 }, { "epoch": 0.7690654824789156, "grad_norm": 1.3026014566421509, "learning_rate": 2.6688426537772194e-05, "loss": 1.5714, "step": 21475 }, { "epoch": 0.7691012946084839, "grad_norm": 1.7563316822052002, "learning_rate": 2.668053849697123e-05, "loss": 1.3821, "step": 21476 }, { "epoch": 0.7691371067380521, "grad_norm": 1.6058958768844604, "learning_rate": 2.6672651442589046e-05, "loss": 1.4226, "step": 21477 }, { "epoch": 0.7691729188676205, "grad_norm": 1.446184754371643, "learning_rate": 2.6664765374731693e-05, "loss": 1.1693, "step": 21478 }, { "epoch": 0.7692087309971888, "grad_norm": 1.7952556610107422, "learning_rate": 2.665688029350538e-05, "loss": 1.5924, "step": 21479 }, { "epoch": 0.769244543126757, "grad_norm": 1.4149837493896484, "learning_rate": 2.6648996199016118e-05, "loss": 1.5261, "step": 21480 }, { "epoch": 0.7692803552563253, "grad_norm": 2.5518479347229004, "learning_rate": 2.6641113091370017e-05, "loss": 1.4939, "step": 21481 }, { "epoch": 0.7693161673858936, "grad_norm": 2.9114930629730225, "learning_rate": 2.6633230970673062e-05, "loss": 1.8433, "step": 21482 }, { "epoch": 0.7693519795154619, "grad_norm": 1.5813169479370117, "learning_rate": 2.66253498370314e-05, "loss": 1.3981, "step": 21483 }, { "epoch": 0.7693877916450301, "grad_norm": 1.501997947692871, "learning_rate": 2.661746969055098e-05, "loss": 1.425, "step": 21484 }, { "epoch": 0.7694236037745985, "grad_norm": 1.4923934936523438, "learning_rate": 2.660959053133786e-05, "loss": 1.4173, "step": 21485 }, { "epoch": 0.7694594159041668, "grad_norm": 1.368891954421997, "learning_rate": 2.6601712359498045e-05, "loss": 1.5218, "step": 21486 }, { "epoch": 0.769495228033735, "grad_norm": 1.6902717351913452, "learning_rate": 2.6593835175137494e-05, "loss": 1.2863, "step": 21487 }, { "epoch": 0.7695310401633033, "grad_norm": 1.7259552478790283, "learning_rate": 2.6585958978362235e-05, "loss": 1.4637, "step": 21488 }, { "epoch": 0.7695668522928716, "grad_norm": 1.6730120182037354, "learning_rate": 2.6578083769278127e-05, "loss": 1.3071, "step": 21489 }, { "epoch": 0.7696026644224399, "grad_norm": 2.149928331375122, "learning_rate": 2.6570209547991265e-05, "loss": 1.5697, "step": 21490 }, { "epoch": 0.7696384765520081, "grad_norm": 2.8818540573120117, "learning_rate": 2.6562336314607484e-05, "loss": 1.3422, "step": 21491 }, { "epoch": 0.7696742886815765, "grad_norm": 1.4298157691955566, "learning_rate": 2.6554464069232776e-05, "loss": 1.4364, "step": 21492 }, { "epoch": 0.7697101008111448, "grad_norm": 1.7484629154205322, "learning_rate": 2.6546592811972948e-05, "loss": 1.5057, "step": 21493 }, { "epoch": 0.769745912940713, "grad_norm": 2.24288272857666, "learning_rate": 2.6538722542934035e-05, "loss": 1.6272, "step": 21494 }, { "epoch": 0.7697817250702813, "grad_norm": 1.6153521537780762, "learning_rate": 2.6530853262221843e-05, "loss": 1.3215, "step": 21495 }, { "epoch": 0.7698175371998496, "grad_norm": 1.5541167259216309, "learning_rate": 2.652298496994222e-05, "loss": 1.3765, "step": 21496 }, { "epoch": 0.7698533493294178, "grad_norm": 2.0340280532836914, "learning_rate": 2.6515117666201062e-05, "loss": 1.6024, "step": 21497 }, { "epoch": 0.7698891614589861, "grad_norm": 1.580460786819458, "learning_rate": 2.6507251351104212e-05, "loss": 1.1933, "step": 21498 }, { "epoch": 0.7699249735885545, "grad_norm": 1.4655922651290894, "learning_rate": 2.649938602475751e-05, "loss": 0.9808, "step": 21499 }, { "epoch": 0.7699607857181228, "grad_norm": 1.751434326171875, "learning_rate": 2.6491521687266717e-05, "loss": 1.4647, "step": 21500 }, { "epoch": 0.769996597847691, "grad_norm": 1.7684653997421265, "learning_rate": 2.6483658338737726e-05, "loss": 1.6803, "step": 21501 }, { "epoch": 0.7700324099772593, "grad_norm": 1.5846799612045288, "learning_rate": 2.6475795979276262e-05, "loss": 1.5269, "step": 21502 }, { "epoch": 0.7700682221068276, "grad_norm": 1.7030701637268066, "learning_rate": 2.6467934608988155e-05, "loss": 1.4744, "step": 21503 }, { "epoch": 0.7701040342363958, "grad_norm": 2.3186452388763428, "learning_rate": 2.6460074227979104e-05, "loss": 1.5314, "step": 21504 }, { "epoch": 0.7701398463659641, "grad_norm": 1.4595292806625366, "learning_rate": 2.6452214836354893e-05, "loss": 1.7642, "step": 21505 }, { "epoch": 0.7701756584955325, "grad_norm": 1.770524501800537, "learning_rate": 2.6444356434221296e-05, "loss": 1.5098, "step": 21506 }, { "epoch": 0.7702114706251008, "grad_norm": 3.266895055770874, "learning_rate": 2.643649902168397e-05, "loss": 1.4408, "step": 21507 }, { "epoch": 0.770247282754669, "grad_norm": 1.3348801136016846, "learning_rate": 2.6428642598848663e-05, "loss": 1.5114, "step": 21508 }, { "epoch": 0.7702830948842373, "grad_norm": 1.5498926639556885, "learning_rate": 2.642078716582107e-05, "loss": 1.3847, "step": 21509 }, { "epoch": 0.7703189070138056, "grad_norm": 1.818127155303955, "learning_rate": 2.6412932722706908e-05, "loss": 1.3761, "step": 21510 }, { "epoch": 0.7703547191433738, "grad_norm": 1.73329758644104, "learning_rate": 2.6405079269611744e-05, "loss": 1.3512, "step": 21511 }, { "epoch": 0.7703905312729421, "grad_norm": 1.2978670597076416, "learning_rate": 2.6397226806641375e-05, "loss": 1.4232, "step": 21512 }, { "epoch": 0.7704263434025105, "grad_norm": 1.5033830404281616, "learning_rate": 2.6389375333901377e-05, "loss": 1.3919, "step": 21513 }, { "epoch": 0.7704621555320788, "grad_norm": 1.4706625938415527, "learning_rate": 2.6381524851497353e-05, "loss": 1.4614, "step": 21514 }, { "epoch": 0.770497967661647, "grad_norm": 1.6710084676742554, "learning_rate": 2.6373675359534955e-05, "loss": 1.549, "step": 21515 }, { "epoch": 0.7705337797912153, "grad_norm": 1.2906526327133179, "learning_rate": 2.636582685811978e-05, "loss": 1.4191, "step": 21516 }, { "epoch": 0.7705695919207836, "grad_norm": 1.3973788022994995, "learning_rate": 2.6357979347357454e-05, "loss": 1.2873, "step": 21517 }, { "epoch": 0.7706054040503518, "grad_norm": 1.4239815473556519, "learning_rate": 2.635013282735349e-05, "loss": 1.5933, "step": 21518 }, { "epoch": 0.7706412161799201, "grad_norm": 2.066704273223877, "learning_rate": 2.63422872982135e-05, "loss": 1.3148, "step": 21519 }, { "epoch": 0.7706770283094885, "grad_norm": 1.2484495639801025, "learning_rate": 2.6334442760043044e-05, "loss": 1.3429, "step": 21520 }, { "epoch": 0.7707128404390567, "grad_norm": 1.6173884868621826, "learning_rate": 2.632659921294761e-05, "loss": 1.4515, "step": 21521 }, { "epoch": 0.770748652568625, "grad_norm": 1.4764695167541504, "learning_rate": 2.631875665703275e-05, "loss": 1.4135, "step": 21522 }, { "epoch": 0.7707844646981933, "grad_norm": 1.487092137336731, "learning_rate": 2.6310915092403976e-05, "loss": 1.2509, "step": 21523 }, { "epoch": 0.7708202768277616, "grad_norm": 1.7480628490447998, "learning_rate": 2.6303074519166827e-05, "loss": 1.6969, "step": 21524 }, { "epoch": 0.7708560889573298, "grad_norm": 1.725825309753418, "learning_rate": 2.6295234937426706e-05, "loss": 1.3108, "step": 21525 }, { "epoch": 0.7708919010868981, "grad_norm": 1.542431116104126, "learning_rate": 2.628739634728914e-05, "loss": 1.0812, "step": 21526 }, { "epoch": 0.7709277132164665, "grad_norm": 1.9049519300460815, "learning_rate": 2.6279558748859555e-05, "loss": 1.659, "step": 21527 }, { "epoch": 0.7709635253460347, "grad_norm": 1.3463095426559448, "learning_rate": 2.627172214224346e-05, "loss": 1.5486, "step": 21528 }, { "epoch": 0.770999337475603, "grad_norm": 2.2055890560150146, "learning_rate": 2.626388652754621e-05, "loss": 1.6108, "step": 21529 }, { "epoch": 0.7710351496051713, "grad_norm": 1.8153146505355835, "learning_rate": 2.6256051904873246e-05, "loss": 1.1265, "step": 21530 }, { "epoch": 0.7710709617347395, "grad_norm": 2.994877338409424, "learning_rate": 2.6248218274330017e-05, "loss": 1.6599, "step": 21531 }, { "epoch": 0.7711067738643078, "grad_norm": 1.478348731994629, "learning_rate": 2.6240385636021847e-05, "loss": 1.4522, "step": 21532 }, { "epoch": 0.7711425859938761, "grad_norm": 1.8544647693634033, "learning_rate": 2.6232553990054144e-05, "loss": 1.4124, "step": 21533 }, { "epoch": 0.7711783981234445, "grad_norm": 1.606522798538208, "learning_rate": 2.6224723336532274e-05, "loss": 1.3303, "step": 21534 }, { "epoch": 0.7712142102530127, "grad_norm": 1.5226876735687256, "learning_rate": 2.6216893675561617e-05, "loss": 1.5826, "step": 21535 }, { "epoch": 0.771250022382581, "grad_norm": 1.7415231466293335, "learning_rate": 2.6209065007247458e-05, "loss": 1.3703, "step": 21536 }, { "epoch": 0.7712858345121493, "grad_norm": 1.5941494703292847, "learning_rate": 2.6201237331695138e-05, "loss": 1.5155, "step": 21537 }, { "epoch": 0.7713216466417175, "grad_norm": 1.5538673400878906, "learning_rate": 2.619341064901001e-05, "loss": 1.2863, "step": 21538 }, { "epoch": 0.7713574587712858, "grad_norm": 1.6205617189407349, "learning_rate": 2.6185584959297303e-05, "loss": 1.4426, "step": 21539 }, { "epoch": 0.7713932709008541, "grad_norm": 1.6439517736434937, "learning_rate": 2.6177760262662345e-05, "loss": 1.3722, "step": 21540 }, { "epoch": 0.7714290830304225, "grad_norm": 2.5068721771240234, "learning_rate": 2.6169936559210396e-05, "loss": 1.5905, "step": 21541 }, { "epoch": 0.7714648951599907, "grad_norm": 1.3637676239013672, "learning_rate": 2.6162113849046745e-05, "loss": 1.5606, "step": 21542 }, { "epoch": 0.771500707289559, "grad_norm": 1.5745965242385864, "learning_rate": 2.615429213227658e-05, "loss": 1.7289, "step": 21543 }, { "epoch": 0.7715365194191273, "grad_norm": 1.5057892799377441, "learning_rate": 2.6146471409005158e-05, "loss": 1.6678, "step": 21544 }, { "epoch": 0.7715723315486955, "grad_norm": 1.9322911500930786, "learning_rate": 2.61386516793377e-05, "loss": 1.5913, "step": 21545 }, { "epoch": 0.7716081436782638, "grad_norm": 1.3579450845718384, "learning_rate": 2.6130832943379447e-05, "loss": 1.5061, "step": 21546 }, { "epoch": 0.7716439558078321, "grad_norm": 1.32883620262146, "learning_rate": 2.612301520123551e-05, "loss": 1.0265, "step": 21547 }, { "epoch": 0.7716797679374005, "grad_norm": 1.9010030031204224, "learning_rate": 2.6115198453011114e-05, "loss": 1.4166, "step": 21548 }, { "epoch": 0.7717155800669687, "grad_norm": 1.6484206914901733, "learning_rate": 2.6107382698811446e-05, "loss": 1.4505, "step": 21549 }, { "epoch": 0.771751392196537, "grad_norm": 1.9972126483917236, "learning_rate": 2.609956793874161e-05, "loss": 1.6538, "step": 21550 }, { "epoch": 0.7717872043261053, "grad_norm": 2.0636024475097656, "learning_rate": 2.6091754172906747e-05, "loss": 1.4478, "step": 21551 }, { "epoch": 0.7718230164556735, "grad_norm": 1.371565341949463, "learning_rate": 2.6083941401412005e-05, "loss": 1.4717, "step": 21552 }, { "epoch": 0.7718588285852418, "grad_norm": 1.405341386795044, "learning_rate": 2.6076129624362512e-05, "loss": 1.3875, "step": 21553 }, { "epoch": 0.7718946407148101, "grad_norm": 1.5658798217773438, "learning_rate": 2.6068318841863314e-05, "loss": 1.1604, "step": 21554 }, { "epoch": 0.7719304528443784, "grad_norm": 1.636956810951233, "learning_rate": 2.6060509054019523e-05, "loss": 1.1873, "step": 21555 }, { "epoch": 0.7719662649739467, "grad_norm": 1.5121686458587646, "learning_rate": 2.6052700260936237e-05, "loss": 1.1696, "step": 21556 }, { "epoch": 0.772002077103515, "grad_norm": 1.6735605001449585, "learning_rate": 2.604489246271845e-05, "loss": 1.6659, "step": 21557 }, { "epoch": 0.7720378892330833, "grad_norm": 1.52809739112854, "learning_rate": 2.6037085659471237e-05, "loss": 1.4882, "step": 21558 }, { "epoch": 0.7720737013626515, "grad_norm": 1.5789731740951538, "learning_rate": 2.6029279851299636e-05, "loss": 1.2542, "step": 21559 }, { "epoch": 0.7721095134922198, "grad_norm": 1.3522050380706787, "learning_rate": 2.6021475038308694e-05, "loss": 1.6823, "step": 21560 }, { "epoch": 0.7721453256217881, "grad_norm": 1.3396024703979492, "learning_rate": 2.6013671220603343e-05, "loss": 1.5947, "step": 21561 }, { "epoch": 0.7721811377513564, "grad_norm": 1.3040772676467896, "learning_rate": 2.6005868398288614e-05, "loss": 1.1411, "step": 21562 }, { "epoch": 0.7722169498809247, "grad_norm": 2.3642704486846924, "learning_rate": 2.5998066571469482e-05, "loss": 1.5471, "step": 21563 }, { "epoch": 0.772252762010493, "grad_norm": 2.173736333847046, "learning_rate": 2.599026574025093e-05, "loss": 1.4395, "step": 21564 }, { "epoch": 0.7722885741400612, "grad_norm": 1.3441381454467773, "learning_rate": 2.5982465904737895e-05, "loss": 1.3327, "step": 21565 }, { "epoch": 0.7723243862696295, "grad_norm": 1.4685009717941284, "learning_rate": 2.597466706503524e-05, "loss": 1.6688, "step": 21566 }, { "epoch": 0.7723601983991978, "grad_norm": 1.6783415079116821, "learning_rate": 2.5966869221248013e-05, "loss": 1.5029, "step": 21567 }, { "epoch": 0.772396010528766, "grad_norm": 1.6194955110549927, "learning_rate": 2.595907237348104e-05, "loss": 1.4862, "step": 21568 }, { "epoch": 0.7724318226583344, "grad_norm": 1.703182339668274, "learning_rate": 2.595127652183924e-05, "loss": 1.3713, "step": 21569 }, { "epoch": 0.7724676347879027, "grad_norm": 1.670957088470459, "learning_rate": 2.5943481666427506e-05, "loss": 1.3126, "step": 21570 }, { "epoch": 0.772503446917471, "grad_norm": 1.6317272186279297, "learning_rate": 2.5935687807350718e-05, "loss": 1.6463, "step": 21571 }, { "epoch": 0.7725392590470392, "grad_norm": 1.457313895225525, "learning_rate": 2.5927894944713695e-05, "loss": 1.2296, "step": 21572 }, { "epoch": 0.7725750711766075, "grad_norm": 2.3845486640930176, "learning_rate": 2.5920103078621294e-05, "loss": 1.4234, "step": 21573 }, { "epoch": 0.7726108833061758, "grad_norm": 2.011991500854492, "learning_rate": 2.591231220917837e-05, "loss": 1.5692, "step": 21574 }, { "epoch": 0.772646695435744, "grad_norm": 1.949367642402649, "learning_rate": 2.59045223364897e-05, "loss": 1.5255, "step": 21575 }, { "epoch": 0.7726825075653124, "grad_norm": 1.472780704498291, "learning_rate": 2.5896733460660138e-05, "loss": 1.5582, "step": 21576 }, { "epoch": 0.7727183196948807, "grad_norm": 1.6741362810134888, "learning_rate": 2.5888945581794377e-05, "loss": 1.4447, "step": 21577 }, { "epoch": 0.772754131824449, "grad_norm": 1.7349146604537964, "learning_rate": 2.5881158699997322e-05, "loss": 1.4366, "step": 21578 }, { "epoch": 0.7727899439540172, "grad_norm": 1.5743865966796875, "learning_rate": 2.5873372815373633e-05, "loss": 1.5798, "step": 21579 }, { "epoch": 0.7728257560835855, "grad_norm": 1.3788039684295654, "learning_rate": 2.5865587928028124e-05, "loss": 1.1381, "step": 21580 }, { "epoch": 0.7728615682131538, "grad_norm": 1.6148918867111206, "learning_rate": 2.5857804038065446e-05, "loss": 1.263, "step": 21581 }, { "epoch": 0.772897380342722, "grad_norm": 1.67681086063385, "learning_rate": 2.585002114559044e-05, "loss": 1.6036, "step": 21582 }, { "epoch": 0.7729331924722904, "grad_norm": 1.5975325107574463, "learning_rate": 2.5842239250707757e-05, "loss": 1.2015, "step": 21583 }, { "epoch": 0.7729690046018587, "grad_norm": 1.4812545776367188, "learning_rate": 2.5834458353522018e-05, "loss": 1.3873, "step": 21584 }, { "epoch": 0.773004816731427, "grad_norm": 1.8217486143112183, "learning_rate": 2.5826678454138044e-05, "loss": 1.6502, "step": 21585 }, { "epoch": 0.7730406288609952, "grad_norm": 1.686937928199768, "learning_rate": 2.5818899552660404e-05, "loss": 1.5596, "step": 21586 }, { "epoch": 0.7730764409905635, "grad_norm": 1.654982328414917, "learning_rate": 2.5811121649193805e-05, "loss": 1.3852, "step": 21587 }, { "epoch": 0.7731122531201318, "grad_norm": 1.7891792058944702, "learning_rate": 2.5803344743842817e-05, "loss": 1.3776, "step": 21588 }, { "epoch": 0.7731480652497, "grad_norm": 1.1618537902832031, "learning_rate": 2.579556883671217e-05, "loss": 1.4942, "step": 21589 }, { "epoch": 0.7731838773792683, "grad_norm": 2.126289129257202, "learning_rate": 2.578779392790641e-05, "loss": 1.4412, "step": 21590 }, { "epoch": 0.7732196895088367, "grad_norm": 1.313543438911438, "learning_rate": 2.5780020017530182e-05, "loss": 1.3064, "step": 21591 }, { "epoch": 0.773255501638405, "grad_norm": 2.4040563106536865, "learning_rate": 2.5772247105688006e-05, "loss": 1.2878, "step": 21592 }, { "epoch": 0.7732913137679732, "grad_norm": 1.7156052589416504, "learning_rate": 2.5764475192484506e-05, "loss": 1.463, "step": 21593 }, { "epoch": 0.7733271258975415, "grad_norm": 1.546714425086975, "learning_rate": 2.5756704278024268e-05, "loss": 1.183, "step": 21594 }, { "epoch": 0.7733629380271098, "grad_norm": 1.4804030656814575, "learning_rate": 2.5748934362411747e-05, "loss": 1.3276, "step": 21595 }, { "epoch": 0.773398750156678, "grad_norm": 1.903746247291565, "learning_rate": 2.574116544575159e-05, "loss": 1.526, "step": 21596 }, { "epoch": 0.7734345622862463, "grad_norm": 1.246662974357605, "learning_rate": 2.573339752814825e-05, "loss": 1.1375, "step": 21597 }, { "epoch": 0.7734703744158147, "grad_norm": 2.333866834640503, "learning_rate": 2.5725630609706264e-05, "loss": 1.7398, "step": 21598 }, { "epoch": 0.773506186545383, "grad_norm": 1.3863903284072876, "learning_rate": 2.571786469053006e-05, "loss": 1.7066, "step": 21599 }, { "epoch": 0.7735419986749512, "grad_norm": 1.4405169486999512, "learning_rate": 2.5710099770724227e-05, "loss": 1.6633, "step": 21600 }, { "epoch": 0.7735778108045195, "grad_norm": 1.563675880432129, "learning_rate": 2.5702335850393166e-05, "loss": 1.2315, "step": 21601 }, { "epoch": 0.7736136229340878, "grad_norm": 1.655320644378662, "learning_rate": 2.5694572929641326e-05, "loss": 1.5362, "step": 21602 }, { "epoch": 0.773649435063656, "grad_norm": 1.73147714138031, "learning_rate": 2.5686811008573142e-05, "loss": 1.2198, "step": 21603 }, { "epoch": 0.7736852471932243, "grad_norm": 1.8005670309066772, "learning_rate": 2.5679050087293067e-05, "loss": 1.3593, "step": 21604 }, { "epoch": 0.7737210593227927, "grad_norm": 1.3524696826934814, "learning_rate": 2.5671290165905537e-05, "loss": 1.5948, "step": 21605 }, { "epoch": 0.7737568714523609, "grad_norm": 1.4053454399108887, "learning_rate": 2.5663531244514892e-05, "loss": 1.4189, "step": 21606 }, { "epoch": 0.7737926835819292, "grad_norm": 1.6903241872787476, "learning_rate": 2.5655773323225552e-05, "loss": 1.6855, "step": 21607 }, { "epoch": 0.7738284957114975, "grad_norm": 2.3848190307617188, "learning_rate": 2.564801640214187e-05, "loss": 1.481, "step": 21608 }, { "epoch": 0.7738643078410657, "grad_norm": 1.8017464876174927, "learning_rate": 2.564026048136826e-05, "loss": 1.7375, "step": 21609 }, { "epoch": 0.773900119970634, "grad_norm": 1.8137153387069702, "learning_rate": 2.5632505561009002e-05, "loss": 1.7897, "step": 21610 }, { "epoch": 0.7739359321002023, "grad_norm": 1.650579810142517, "learning_rate": 2.5624751641168442e-05, "loss": 1.4311, "step": 21611 }, { "epoch": 0.7739717442297707, "grad_norm": 1.6380149126052856, "learning_rate": 2.5616998721950948e-05, "loss": 1.357, "step": 21612 }, { "epoch": 0.7740075563593389, "grad_norm": 1.4498231410980225, "learning_rate": 2.5609246803460764e-05, "loss": 1.6064, "step": 21613 }, { "epoch": 0.7740433684889072, "grad_norm": 1.459731101989746, "learning_rate": 2.5601495885802196e-05, "loss": 1.58, "step": 21614 }, { "epoch": 0.7740791806184755, "grad_norm": 2.7620160579681396, "learning_rate": 2.559374596907954e-05, "loss": 1.3054, "step": 21615 }, { "epoch": 0.7741149927480437, "grad_norm": 1.4486792087554932, "learning_rate": 2.5585997053397083e-05, "loss": 1.7824, "step": 21616 }, { "epoch": 0.774150804877612, "grad_norm": 1.2154338359832764, "learning_rate": 2.5578249138859023e-05, "loss": 1.4483, "step": 21617 }, { "epoch": 0.7741866170071803, "grad_norm": 1.8012681007385254, "learning_rate": 2.5570502225569625e-05, "loss": 1.4442, "step": 21618 }, { "epoch": 0.7742224291367487, "grad_norm": 2.442791700363159, "learning_rate": 2.556275631363314e-05, "loss": 1.7195, "step": 21619 }, { "epoch": 0.7742582412663169, "grad_norm": 1.6836718320846558, "learning_rate": 2.5555011403153715e-05, "loss": 1.3161, "step": 21620 }, { "epoch": 0.7742940533958852, "grad_norm": 1.6451094150543213, "learning_rate": 2.5547267494235595e-05, "loss": 1.1316, "step": 21621 }, { "epoch": 0.7743298655254535, "grad_norm": 1.3877159357070923, "learning_rate": 2.5539524586982944e-05, "loss": 1.1024, "step": 21622 }, { "epoch": 0.7743656776550217, "grad_norm": 1.4425606727600098, "learning_rate": 2.553178268149997e-05, "loss": 1.544, "step": 21623 }, { "epoch": 0.77440148978459, "grad_norm": 1.7422723770141602, "learning_rate": 2.5524041777890783e-05, "loss": 1.1543, "step": 21624 }, { "epoch": 0.7744373019141583, "grad_norm": 1.4674055576324463, "learning_rate": 2.5516301876259542e-05, "loss": 1.3218, "step": 21625 }, { "epoch": 0.7744731140437267, "grad_norm": 1.6906684637069702, "learning_rate": 2.5508562976710416e-05, "loss": 1.2574, "step": 21626 }, { "epoch": 0.7745089261732949, "grad_norm": 1.5787941217422485, "learning_rate": 2.5500825079347458e-05, "loss": 1.1405, "step": 21627 }, { "epoch": 0.7745447383028632, "grad_norm": 2.0668768882751465, "learning_rate": 2.5493088184274795e-05, "loss": 1.4762, "step": 21628 }, { "epoch": 0.7745805504324315, "grad_norm": 2.3916802406311035, "learning_rate": 2.548535229159653e-05, "loss": 1.322, "step": 21629 }, { "epoch": 0.7746163625619997, "grad_norm": 1.847888708114624, "learning_rate": 2.5477617401416765e-05, "loss": 1.4714, "step": 21630 }, { "epoch": 0.774652174691568, "grad_norm": 1.9050281047821045, "learning_rate": 2.5469883513839498e-05, "loss": 1.3579, "step": 21631 }, { "epoch": 0.7746879868211363, "grad_norm": 1.6340993642807007, "learning_rate": 2.5462150628968806e-05, "loss": 1.6059, "step": 21632 }, { "epoch": 0.7747237989507046, "grad_norm": 1.4270821809768677, "learning_rate": 2.5454418746908737e-05, "loss": 1.3574, "step": 21633 }, { "epoch": 0.7747596110802729, "grad_norm": 2.2271409034729004, "learning_rate": 2.544668786776333e-05, "loss": 1.3854, "step": 21634 }, { "epoch": 0.7747954232098412, "grad_norm": 1.591993808746338, "learning_rate": 2.5438957991636546e-05, "loss": 1.3642, "step": 21635 }, { "epoch": 0.7748312353394095, "grad_norm": 1.3219497203826904, "learning_rate": 2.5431229118632406e-05, "loss": 1.3877, "step": 21636 }, { "epoch": 0.7748670474689777, "grad_norm": 1.488739013671875, "learning_rate": 2.542350124885492e-05, "loss": 1.291, "step": 21637 }, { "epoch": 0.774902859598546, "grad_norm": 1.77037513256073, "learning_rate": 2.5415774382407997e-05, "loss": 1.5163, "step": 21638 }, { "epoch": 0.7749386717281143, "grad_norm": 1.768937587738037, "learning_rate": 2.5408048519395622e-05, "loss": 1.3945, "step": 21639 }, { "epoch": 0.7749744838576826, "grad_norm": 1.5960173606872559, "learning_rate": 2.5400323659921744e-05, "loss": 1.361, "step": 21640 }, { "epoch": 0.7750102959872509, "grad_norm": 2.2278168201446533, "learning_rate": 2.539259980409031e-05, "loss": 1.6758, "step": 21641 }, { "epoch": 0.7750461081168192, "grad_norm": 2.10552978515625, "learning_rate": 2.5384876952005177e-05, "loss": 1.4251, "step": 21642 }, { "epoch": 0.7750819202463874, "grad_norm": 1.4312798976898193, "learning_rate": 2.537715510377028e-05, "loss": 1.7312, "step": 21643 }, { "epoch": 0.7751177323759557, "grad_norm": 1.418057918548584, "learning_rate": 2.5369434259489534e-05, "loss": 1.3847, "step": 21644 }, { "epoch": 0.775153544505524, "grad_norm": 1.6209661960601807, "learning_rate": 2.5361714419266757e-05, "loss": 1.4997, "step": 21645 }, { "epoch": 0.7751893566350923, "grad_norm": 1.4104539155960083, "learning_rate": 2.5353995583205824e-05, "loss": 1.4841, "step": 21646 }, { "epoch": 0.7752251687646606, "grad_norm": 1.7481262683868408, "learning_rate": 2.5346277751410607e-05, "loss": 1.3121, "step": 21647 }, { "epoch": 0.7752609808942289, "grad_norm": 1.7740089893341064, "learning_rate": 2.5338560923984954e-05, "loss": 1.8563, "step": 21648 }, { "epoch": 0.7752967930237972, "grad_norm": 1.5107569694519043, "learning_rate": 2.533084510103263e-05, "loss": 1.2749, "step": 21649 }, { "epoch": 0.7753326051533654, "grad_norm": 1.553844690322876, "learning_rate": 2.532313028265746e-05, "loss": 1.6047, "step": 21650 }, { "epoch": 0.7753684172829337, "grad_norm": 1.7860406637191772, "learning_rate": 2.531541646896325e-05, "loss": 1.3046, "step": 21651 }, { "epoch": 0.775404229412502, "grad_norm": 1.328852653503418, "learning_rate": 2.5307703660053805e-05, "loss": 1.031, "step": 21652 }, { "epoch": 0.7754400415420702, "grad_norm": 1.684956669807434, "learning_rate": 2.5299991856032835e-05, "loss": 1.131, "step": 21653 }, { "epoch": 0.7754758536716386, "grad_norm": 1.5441877841949463, "learning_rate": 2.5292281057004108e-05, "loss": 1.5972, "step": 21654 }, { "epoch": 0.7755116658012069, "grad_norm": 2.099240303039551, "learning_rate": 2.528457126307141e-05, "loss": 1.5017, "step": 21655 }, { "epoch": 0.7755474779307752, "grad_norm": 1.9487109184265137, "learning_rate": 2.5276862474338404e-05, "loss": 1.5184, "step": 21656 }, { "epoch": 0.7755832900603434, "grad_norm": 1.8288156986236572, "learning_rate": 2.5269154690908827e-05, "loss": 1.393, "step": 21657 }, { "epoch": 0.7756191021899117, "grad_norm": 1.4676835536956787, "learning_rate": 2.526144791288637e-05, "loss": 1.2301, "step": 21658 }, { "epoch": 0.77565491431948, "grad_norm": 1.6494859457015991, "learning_rate": 2.525374214037476e-05, "loss": 1.4331, "step": 21659 }, { "epoch": 0.7756907264490482, "grad_norm": 2.2830381393432617, "learning_rate": 2.5246037373477606e-05, "loss": 1.47, "step": 21660 }, { "epoch": 0.7757265385786166, "grad_norm": 1.7719887495040894, "learning_rate": 2.523833361229859e-05, "loss": 1.7523, "step": 21661 }, { "epoch": 0.7757623507081849, "grad_norm": 1.495537519454956, "learning_rate": 2.5230630856941394e-05, "loss": 1.2791, "step": 21662 }, { "epoch": 0.7757981628377532, "grad_norm": 1.52877676486969, "learning_rate": 2.5222929107509584e-05, "loss": 1.5248, "step": 21663 }, { "epoch": 0.7758339749673214, "grad_norm": 1.7707349061965942, "learning_rate": 2.5215228364106835e-05, "loss": 1.3937, "step": 21664 }, { "epoch": 0.7758697870968897, "grad_norm": 1.9539955854415894, "learning_rate": 2.5207528626836662e-05, "loss": 1.7118, "step": 21665 }, { "epoch": 0.775905599226458, "grad_norm": 1.8302944898605347, "learning_rate": 2.5199829895802775e-05, "loss": 1.5033, "step": 21666 }, { "epoch": 0.7759414113560262, "grad_norm": 2.0510201454162598, "learning_rate": 2.519213217110866e-05, "loss": 1.6254, "step": 21667 }, { "epoch": 0.7759772234855946, "grad_norm": 2.2383663654327393, "learning_rate": 2.5184435452857913e-05, "loss": 1.6945, "step": 21668 }, { "epoch": 0.7760130356151629, "grad_norm": 1.3355016708374023, "learning_rate": 2.517673974115409e-05, "loss": 1.1414, "step": 21669 }, { "epoch": 0.7760488477447312, "grad_norm": 1.586307406425476, "learning_rate": 2.5169045036100736e-05, "loss": 1.6073, "step": 21670 }, { "epoch": 0.7760846598742994, "grad_norm": 1.2950676679611206, "learning_rate": 2.5161351337801363e-05, "loss": 1.399, "step": 21671 }, { "epoch": 0.7761204720038677, "grad_norm": 1.7637110948562622, "learning_rate": 2.5153658646359412e-05, "loss": 1.2684, "step": 21672 }, { "epoch": 0.776156284133436, "grad_norm": 1.1106634140014648, "learning_rate": 2.51459669618785e-05, "loss": 1.3017, "step": 21673 }, { "epoch": 0.7761920962630042, "grad_norm": 1.6944717168807983, "learning_rate": 2.5138276284462016e-05, "loss": 1.1044, "step": 21674 }, { "epoch": 0.7762279083925726, "grad_norm": 1.657623291015625, "learning_rate": 2.513058661421349e-05, "loss": 1.4931, "step": 21675 }, { "epoch": 0.7762637205221409, "grad_norm": 1.4893158674240112, "learning_rate": 2.512289795123629e-05, "loss": 1.48, "step": 21676 }, { "epoch": 0.7762995326517091, "grad_norm": 1.7651742696762085, "learning_rate": 2.5115210295633974e-05, "loss": 1.2103, "step": 21677 }, { "epoch": 0.7763353447812774, "grad_norm": 2.091895580291748, "learning_rate": 2.5107523647509877e-05, "loss": 1.4103, "step": 21678 }, { "epoch": 0.7763711569108457, "grad_norm": 1.813442587852478, "learning_rate": 2.5099838006967446e-05, "loss": 1.3978, "step": 21679 }, { "epoch": 0.776406969040414, "grad_norm": 1.8207119703292847, "learning_rate": 2.5092153374110107e-05, "loss": 1.2788, "step": 21680 }, { "epoch": 0.7764427811699822, "grad_norm": 1.47296142578125, "learning_rate": 2.5084469749041185e-05, "loss": 1.4844, "step": 21681 }, { "epoch": 0.7764785932995506, "grad_norm": 1.7010948657989502, "learning_rate": 2.5076787131864132e-05, "loss": 1.5089, "step": 21682 }, { "epoch": 0.7765144054291189, "grad_norm": 2.0861148834228516, "learning_rate": 2.506910552268219e-05, "loss": 1.2157, "step": 21683 }, { "epoch": 0.7765502175586871, "grad_norm": 1.6660261154174805, "learning_rate": 2.5061424921598853e-05, "loss": 1.4986, "step": 21684 }, { "epoch": 0.7765860296882554, "grad_norm": 1.3567243814468384, "learning_rate": 2.5053745328717336e-05, "loss": 1.2758, "step": 21685 }, { "epoch": 0.7766218418178237, "grad_norm": 1.429267406463623, "learning_rate": 2.504606674414104e-05, "loss": 1.2731, "step": 21686 }, { "epoch": 0.776657653947392, "grad_norm": 1.6606378555297852, "learning_rate": 2.5038389167973177e-05, "loss": 1.4486, "step": 21687 }, { "epoch": 0.7766934660769602, "grad_norm": 1.906919002532959, "learning_rate": 2.5030712600317143e-05, "loss": 1.6785, "step": 21688 }, { "epoch": 0.7767292782065286, "grad_norm": 1.29216468334198, "learning_rate": 2.5023037041276175e-05, "loss": 1.4627, "step": 21689 }, { "epoch": 0.7767650903360969, "grad_norm": 2.332552433013916, "learning_rate": 2.5015362490953497e-05, "loss": 1.4408, "step": 21690 }, { "epoch": 0.7768009024656651, "grad_norm": 1.672184705734253, "learning_rate": 2.5007688949452402e-05, "loss": 1.2021, "step": 21691 }, { "epoch": 0.7768367145952334, "grad_norm": 1.5943087339401245, "learning_rate": 2.5000016416876103e-05, "loss": 1.4337, "step": 21692 }, { "epoch": 0.7768725267248017, "grad_norm": 1.5827921628952026, "learning_rate": 2.499234489332788e-05, "loss": 1.4747, "step": 21693 }, { "epoch": 0.7769083388543699, "grad_norm": 1.874254584312439, "learning_rate": 2.4984674378910845e-05, "loss": 1.3405, "step": 21694 }, { "epoch": 0.7769441509839382, "grad_norm": 1.8608965873718262, "learning_rate": 2.4977004873728315e-05, "loss": 1.196, "step": 21695 }, { "epoch": 0.7769799631135066, "grad_norm": 2.278493881225586, "learning_rate": 2.496933637788338e-05, "loss": 1.4088, "step": 21696 }, { "epoch": 0.7770157752430749, "grad_norm": 1.8424022197723389, "learning_rate": 2.496166889147926e-05, "loss": 1.3471, "step": 21697 }, { "epoch": 0.7770515873726431, "grad_norm": 1.493421196937561, "learning_rate": 2.495400241461907e-05, "loss": 1.5338, "step": 21698 }, { "epoch": 0.7770873995022114, "grad_norm": 1.4196568727493286, "learning_rate": 2.494633694740598e-05, "loss": 1.4372, "step": 21699 }, { "epoch": 0.7771232116317797, "grad_norm": 1.808488130569458, "learning_rate": 2.4938672489943138e-05, "loss": 1.3776, "step": 21700 }, { "epoch": 0.7771590237613479, "grad_norm": 1.3471368551254272, "learning_rate": 2.493100904233361e-05, "loss": 1.3275, "step": 21701 }, { "epoch": 0.7771948358909162, "grad_norm": 1.5173574686050415, "learning_rate": 2.4923346604680532e-05, "loss": 1.1689, "step": 21702 }, { "epoch": 0.7772306480204846, "grad_norm": 1.4176759719848633, "learning_rate": 2.4915685177086967e-05, "loss": 1.2773, "step": 21703 }, { "epoch": 0.7772664601500529, "grad_norm": 1.5612668991088867, "learning_rate": 2.4908024759656046e-05, "loss": 1.5212, "step": 21704 }, { "epoch": 0.7773022722796211, "grad_norm": 1.379515528678894, "learning_rate": 2.490036535249073e-05, "loss": 1.3418, "step": 21705 }, { "epoch": 0.7773380844091894, "grad_norm": 1.7329143285751343, "learning_rate": 2.489270695569418e-05, "loss": 1.4872, "step": 21706 }, { "epoch": 0.7773738965387577, "grad_norm": 1.6147024631500244, "learning_rate": 2.4885049569369378e-05, "loss": 1.492, "step": 21707 }, { "epoch": 0.7774097086683259, "grad_norm": 1.3835676908493042, "learning_rate": 2.4877393193619315e-05, "loss": 1.2585, "step": 21708 }, { "epoch": 0.7774455207978942, "grad_norm": 1.4012019634246826, "learning_rate": 2.4869737828547024e-05, "loss": 1.4471, "step": 21709 }, { "epoch": 0.7774813329274626, "grad_norm": 1.916560173034668, "learning_rate": 2.4862083474255503e-05, "loss": 1.3948, "step": 21710 }, { "epoch": 0.7775171450570308, "grad_norm": 2.5519630908966064, "learning_rate": 2.485443013084775e-05, "loss": 1.78, "step": 21711 }, { "epoch": 0.7775529571865991, "grad_norm": 1.7727234363555908, "learning_rate": 2.484677779842669e-05, "loss": 1.4787, "step": 21712 }, { "epoch": 0.7775887693161674, "grad_norm": 1.8907781839370728, "learning_rate": 2.4839126477095287e-05, "loss": 1.3687, "step": 21713 }, { "epoch": 0.7776245814457357, "grad_norm": 1.9135130643844604, "learning_rate": 2.4831476166956515e-05, "loss": 1.402, "step": 21714 }, { "epoch": 0.7776603935753039, "grad_norm": 1.6929961442947388, "learning_rate": 2.482382686811324e-05, "loss": 1.4129, "step": 21715 }, { "epoch": 0.7776962057048722, "grad_norm": 1.5624363422393799, "learning_rate": 2.4816178580668415e-05, "loss": 1.4647, "step": 21716 }, { "epoch": 0.7777320178344406, "grad_norm": 1.6534024477005005, "learning_rate": 2.4808531304724913e-05, "loss": 1.497, "step": 21717 }, { "epoch": 0.7777678299640088, "grad_norm": 1.207729458808899, "learning_rate": 2.4800885040385668e-05, "loss": 1.4953, "step": 21718 }, { "epoch": 0.7778036420935771, "grad_norm": 1.8488044738769531, "learning_rate": 2.4793239787753487e-05, "loss": 1.4934, "step": 21719 }, { "epoch": 0.7778394542231454, "grad_norm": 1.6745057106018066, "learning_rate": 2.478559554693125e-05, "loss": 1.3223, "step": 21720 }, { "epoch": 0.7778752663527136, "grad_norm": 1.441912293434143, "learning_rate": 2.4777952318021814e-05, "loss": 1.5393, "step": 21721 }, { "epoch": 0.7779110784822819, "grad_norm": 1.9352595806121826, "learning_rate": 2.4770310101128026e-05, "loss": 1.526, "step": 21722 }, { "epoch": 0.7779468906118502, "grad_norm": 1.4422829151153564, "learning_rate": 2.476266889635265e-05, "loss": 1.6112, "step": 21723 }, { "epoch": 0.7779827027414186, "grad_norm": 1.7226628065109253, "learning_rate": 2.475502870379851e-05, "loss": 1.1627, "step": 21724 }, { "epoch": 0.7780185148709868, "grad_norm": 2.2136683464050293, "learning_rate": 2.474738952356842e-05, "loss": 1.2243, "step": 21725 }, { "epoch": 0.7780543270005551, "grad_norm": 1.4644486904144287, "learning_rate": 2.4739751355765116e-05, "loss": 1.4847, "step": 21726 }, { "epoch": 0.7780901391301234, "grad_norm": 1.2714347839355469, "learning_rate": 2.4732114200491386e-05, "loss": 1.321, "step": 21727 }, { "epoch": 0.7781259512596916, "grad_norm": 1.5196048021316528, "learning_rate": 2.4724478057849965e-05, "loss": 1.4103, "step": 21728 }, { "epoch": 0.7781617633892599, "grad_norm": 1.713654637336731, "learning_rate": 2.4716842927943617e-05, "loss": 1.4538, "step": 21729 }, { "epoch": 0.7781975755188282, "grad_norm": 1.7022624015808105, "learning_rate": 2.4709208810875017e-05, "loss": 1.7523, "step": 21730 }, { "epoch": 0.7782333876483966, "grad_norm": 1.4414794445037842, "learning_rate": 2.4701575706746882e-05, "loss": 1.2275, "step": 21731 }, { "epoch": 0.7782691997779648, "grad_norm": 1.515198826789856, "learning_rate": 2.4693943615661963e-05, "loss": 1.3186, "step": 21732 }, { "epoch": 0.7783050119075331, "grad_norm": 1.7945395708084106, "learning_rate": 2.4686312537722855e-05, "loss": 1.4992, "step": 21733 }, { "epoch": 0.7783408240371014, "grad_norm": 1.8566440343856812, "learning_rate": 2.4678682473032267e-05, "loss": 1.4913, "step": 21734 }, { "epoch": 0.7783766361666696, "grad_norm": 1.767237901687622, "learning_rate": 2.4671053421692845e-05, "loss": 1.3864, "step": 21735 }, { "epoch": 0.7784124482962379, "grad_norm": 1.450467586517334, "learning_rate": 2.466342538380727e-05, "loss": 1.5967, "step": 21736 }, { "epoch": 0.7784482604258062, "grad_norm": 2.0914173126220703, "learning_rate": 2.46557983594781e-05, "loss": 1.4615, "step": 21737 }, { "epoch": 0.7784840725553746, "grad_norm": 1.6237757205963135, "learning_rate": 2.4648172348807963e-05, "loss": 1.4614, "step": 21738 }, { "epoch": 0.7785198846849428, "grad_norm": 1.7426936626434326, "learning_rate": 2.464054735189948e-05, "loss": 1.3176, "step": 21739 }, { "epoch": 0.7785556968145111, "grad_norm": 1.7049877643585205, "learning_rate": 2.4632923368855254e-05, "loss": 1.2699, "step": 21740 }, { "epoch": 0.7785915089440794, "grad_norm": 1.7951089143753052, "learning_rate": 2.4625300399777806e-05, "loss": 1.4016, "step": 21741 }, { "epoch": 0.7786273210736476, "grad_norm": 1.667818546295166, "learning_rate": 2.46176784447697e-05, "loss": 1.3115, "step": 21742 }, { "epoch": 0.7786631332032159, "grad_norm": 1.8031182289123535, "learning_rate": 2.4610057503933537e-05, "loss": 1.5707, "step": 21743 }, { "epoch": 0.7786989453327842, "grad_norm": 1.7315683364868164, "learning_rate": 2.4602437577371763e-05, "loss": 1.3129, "step": 21744 }, { "epoch": 0.7787347574623525, "grad_norm": 1.3032207489013672, "learning_rate": 2.4594818665186937e-05, "loss": 1.3991, "step": 21745 }, { "epoch": 0.7787705695919208, "grad_norm": 1.8537158966064453, "learning_rate": 2.4587200767481565e-05, "loss": 1.1288, "step": 21746 }, { "epoch": 0.7788063817214891, "grad_norm": 2.1073696613311768, "learning_rate": 2.457958388435816e-05, "loss": 1.5075, "step": 21747 }, { "epoch": 0.7788421938510574, "grad_norm": 1.4380515813827515, "learning_rate": 2.4571968015919144e-05, "loss": 1.2178, "step": 21748 }, { "epoch": 0.7788780059806256, "grad_norm": 1.5372939109802246, "learning_rate": 2.4564353162266996e-05, "loss": 1.037, "step": 21749 }, { "epoch": 0.7789138181101939, "grad_norm": 1.7316334247589111, "learning_rate": 2.4556739323504195e-05, "loss": 1.3958, "step": 21750 }, { "epoch": 0.7789496302397622, "grad_norm": 2.508051633834839, "learning_rate": 2.454912649973313e-05, "loss": 1.6857, "step": 21751 }, { "epoch": 0.7789854423693305, "grad_norm": 1.7578423023223877, "learning_rate": 2.4541514691056245e-05, "loss": 1.3476, "step": 21752 }, { "epoch": 0.7790212544988988, "grad_norm": 2.091949462890625, "learning_rate": 2.453390389757595e-05, "loss": 1.594, "step": 21753 }, { "epoch": 0.7790570666284671, "grad_norm": 1.5613199472427368, "learning_rate": 2.4526294119394653e-05, "loss": 1.5924, "step": 21754 }, { "epoch": 0.7790928787580353, "grad_norm": 1.5921406745910645, "learning_rate": 2.451868535661469e-05, "loss": 1.3637, "step": 21755 }, { "epoch": 0.7791286908876036, "grad_norm": 1.6087360382080078, "learning_rate": 2.451107760933845e-05, "loss": 1.5249, "step": 21756 }, { "epoch": 0.7791645030171719, "grad_norm": 1.8573392629623413, "learning_rate": 2.4503470877668287e-05, "loss": 1.5981, "step": 21757 }, { "epoch": 0.7792003151467402, "grad_norm": 1.5626813173294067, "learning_rate": 2.4495865161706567e-05, "loss": 1.3329, "step": 21758 }, { "epoch": 0.7792361272763085, "grad_norm": 2.4538016319274902, "learning_rate": 2.448826046155559e-05, "loss": 1.4858, "step": 21759 }, { "epoch": 0.7792719394058768, "grad_norm": 1.3803672790527344, "learning_rate": 2.4480656777317613e-05, "loss": 1.4366, "step": 21760 }, { "epoch": 0.7793077515354451, "grad_norm": 1.7788008451461792, "learning_rate": 2.447305410909504e-05, "loss": 1.3099, "step": 21761 }, { "epoch": 0.7793435636650133, "grad_norm": 1.5982377529144287, "learning_rate": 2.4465452456990067e-05, "loss": 1.4482, "step": 21762 }, { "epoch": 0.7793793757945816, "grad_norm": 1.2722399234771729, "learning_rate": 2.4457851821105006e-05, "loss": 1.6228, "step": 21763 }, { "epoch": 0.7794151879241499, "grad_norm": 1.843069314956665, "learning_rate": 2.4450252201542102e-05, "loss": 1.3791, "step": 21764 }, { "epoch": 0.7794510000537181, "grad_norm": 1.3950681686401367, "learning_rate": 2.444265359840363e-05, "loss": 1.3212, "step": 21765 }, { "epoch": 0.7794868121832865, "grad_norm": 1.4858050346374512, "learning_rate": 2.4435056011791768e-05, "loss": 1.3646, "step": 21766 }, { "epoch": 0.7795226243128548, "grad_norm": 1.639343023300171, "learning_rate": 2.4427459441808754e-05, "loss": 1.739, "step": 21767 }, { "epoch": 0.7795584364424231, "grad_norm": 1.5904186964035034, "learning_rate": 2.4419863888556815e-05, "loss": 1.3763, "step": 21768 }, { "epoch": 0.7795942485719913, "grad_norm": 1.7701135873794556, "learning_rate": 2.4412269352138097e-05, "loss": 1.5168, "step": 21769 }, { "epoch": 0.7796300607015596, "grad_norm": 1.9620040655136108, "learning_rate": 2.4404675832654812e-05, "loss": 1.421, "step": 21770 }, { "epoch": 0.7796658728311279, "grad_norm": 1.3738888502120972, "learning_rate": 2.4397083330209046e-05, "loss": 1.1076, "step": 21771 }, { "epoch": 0.7797016849606961, "grad_norm": 1.2282766103744507, "learning_rate": 2.438949184490307e-05, "loss": 1.3747, "step": 21772 }, { "epoch": 0.7797374970902645, "grad_norm": 1.7531875371932983, "learning_rate": 2.438190137683891e-05, "loss": 1.4378, "step": 21773 }, { "epoch": 0.7797733092198328, "grad_norm": 1.9136475324630737, "learning_rate": 2.4374311926118765e-05, "loss": 1.4021, "step": 21774 }, { "epoch": 0.7798091213494011, "grad_norm": 1.4294815063476562, "learning_rate": 2.4366723492844644e-05, "loss": 1.5454, "step": 21775 }, { "epoch": 0.7798449334789693, "grad_norm": 1.2743984460830688, "learning_rate": 2.435913607711876e-05, "loss": 1.4154, "step": 21776 }, { "epoch": 0.7798807456085376, "grad_norm": 1.6209650039672852, "learning_rate": 2.4351549679043118e-05, "loss": 1.8038, "step": 21777 }, { "epoch": 0.7799165577381059, "grad_norm": 2.175903081893921, "learning_rate": 2.4343964298719746e-05, "loss": 1.2831, "step": 21778 }, { "epoch": 0.7799523698676741, "grad_norm": 1.389844298362732, "learning_rate": 2.4336379936250808e-05, "loss": 1.459, "step": 21779 }, { "epoch": 0.7799881819972425, "grad_norm": 1.7427324056625366, "learning_rate": 2.4328796591738236e-05, "loss": 1.4667, "step": 21780 }, { "epoch": 0.7800239941268108, "grad_norm": 2.2379322052001953, "learning_rate": 2.432121426528414e-05, "loss": 1.25, "step": 21781 }, { "epoch": 0.780059806256379, "grad_norm": 1.569846510887146, "learning_rate": 2.431363295699042e-05, "loss": 1.5934, "step": 21782 }, { "epoch": 0.7800956183859473, "grad_norm": 1.7577472925186157, "learning_rate": 2.43060526669592e-05, "loss": 1.2713, "step": 21783 }, { "epoch": 0.7801314305155156, "grad_norm": 1.8201638460159302, "learning_rate": 2.4298473395292378e-05, "loss": 1.416, "step": 21784 }, { "epoch": 0.7801672426450839, "grad_norm": 2.0842766761779785, "learning_rate": 2.4290895142091974e-05, "loss": 1.3904, "step": 21785 }, { "epoch": 0.7802030547746521, "grad_norm": 1.799854040145874, "learning_rate": 2.428331790745989e-05, "loss": 1.6248, "step": 21786 }, { "epoch": 0.7802388669042205, "grad_norm": 1.5186867713928223, "learning_rate": 2.42757416914981e-05, "loss": 1.6752, "step": 21787 }, { "epoch": 0.7802746790337888, "grad_norm": 1.4737837314605713, "learning_rate": 2.4268166494308553e-05, "loss": 1.4571, "step": 21788 }, { "epoch": 0.780310491163357, "grad_norm": 1.4512406587600708, "learning_rate": 2.426059231599308e-05, "loss": 1.2519, "step": 21789 }, { "epoch": 0.7803463032929253, "grad_norm": 1.9012922048568726, "learning_rate": 2.42530191566537e-05, "loss": 1.3534, "step": 21790 }, { "epoch": 0.7803821154224936, "grad_norm": 1.6925349235534668, "learning_rate": 2.4245447016392207e-05, "loss": 1.4462, "step": 21791 }, { "epoch": 0.7804179275520619, "grad_norm": 1.83090078830719, "learning_rate": 2.4237875895310548e-05, "loss": 1.4723, "step": 21792 }, { "epoch": 0.7804537396816301, "grad_norm": 1.7170168161392212, "learning_rate": 2.4230305793510478e-05, "loss": 1.6161, "step": 21793 }, { "epoch": 0.7804895518111985, "grad_norm": 1.244886040687561, "learning_rate": 2.4222736711093964e-05, "loss": 1.4318, "step": 21794 }, { "epoch": 0.7805253639407668, "grad_norm": 1.9996095895767212, "learning_rate": 2.4215168648162778e-05, "loss": 1.3049, "step": 21795 }, { "epoch": 0.780561176070335, "grad_norm": 1.6705350875854492, "learning_rate": 2.420760160481872e-05, "loss": 1.4626, "step": 21796 }, { "epoch": 0.7805969881999033, "grad_norm": 1.3955320119857788, "learning_rate": 2.4200035581163614e-05, "loss": 1.4416, "step": 21797 }, { "epoch": 0.7806328003294716, "grad_norm": 2.089934825897217, "learning_rate": 2.4192470577299263e-05, "loss": 1.2793, "step": 21798 }, { "epoch": 0.7806686124590398, "grad_norm": 1.3678085803985596, "learning_rate": 2.418490659332746e-05, "loss": 1.2663, "step": 21799 }, { "epoch": 0.7807044245886081, "grad_norm": 1.447238564491272, "learning_rate": 2.4177343629349912e-05, "loss": 1.412, "step": 21800 }, { "epoch": 0.7807402367181765, "grad_norm": 2.202078342437744, "learning_rate": 2.4169781685468407e-05, "loss": 1.4239, "step": 21801 }, { "epoch": 0.7807760488477448, "grad_norm": 1.9966267347335815, "learning_rate": 2.416222076178467e-05, "loss": 1.31, "step": 21802 }, { "epoch": 0.780811860977313, "grad_norm": 2.0359063148498535, "learning_rate": 2.4154660858400456e-05, "loss": 1.5723, "step": 21803 }, { "epoch": 0.7808476731068813, "grad_norm": 1.8663406372070312, "learning_rate": 2.414710197541743e-05, "loss": 1.5869, "step": 21804 }, { "epoch": 0.7808834852364496, "grad_norm": 1.834692120552063, "learning_rate": 2.4139544112937283e-05, "loss": 1.5305, "step": 21805 }, { "epoch": 0.7809192973660178, "grad_norm": 1.34640634059906, "learning_rate": 2.413198727106176e-05, "loss": 1.6293, "step": 21806 }, { "epoch": 0.7809551094955861, "grad_norm": 2.504323959350586, "learning_rate": 2.412443144989246e-05, "loss": 1.2204, "step": 21807 }, { "epoch": 0.7809909216251545, "grad_norm": 2.4626822471618652, "learning_rate": 2.411687664953106e-05, "loss": 1.5398, "step": 21808 }, { "epoch": 0.7810267337547228, "grad_norm": 1.4830766916275024, "learning_rate": 2.41093228700792e-05, "loss": 1.4997, "step": 21809 }, { "epoch": 0.781062545884291, "grad_norm": 1.2905964851379395, "learning_rate": 2.4101770111638534e-05, "loss": 1.2161, "step": 21810 }, { "epoch": 0.7810983580138593, "grad_norm": 1.5993380546569824, "learning_rate": 2.409421837431063e-05, "loss": 1.3006, "step": 21811 }, { "epoch": 0.7811341701434276, "grad_norm": 1.530030608177185, "learning_rate": 2.4086667658197093e-05, "loss": 1.3426, "step": 21812 }, { "epoch": 0.7811699822729958, "grad_norm": 1.3062310218811035, "learning_rate": 2.4079117963399554e-05, "loss": 1.4719, "step": 21813 }, { "epoch": 0.7812057944025641, "grad_norm": 1.582737684249878, "learning_rate": 2.4071569290019535e-05, "loss": 1.2686, "step": 21814 }, { "epoch": 0.7812416065321325, "grad_norm": 1.5829455852508545, "learning_rate": 2.4064021638158596e-05, "loss": 1.7681, "step": 21815 }, { "epoch": 0.7812774186617008, "grad_norm": 1.5450665950775146, "learning_rate": 2.40564750079183e-05, "loss": 1.3582, "step": 21816 }, { "epoch": 0.781313230791269, "grad_norm": 1.877021312713623, "learning_rate": 2.404892939940021e-05, "loss": 1.5996, "step": 21817 }, { "epoch": 0.7813490429208373, "grad_norm": 1.5125741958618164, "learning_rate": 2.404138481270577e-05, "loss": 1.4883, "step": 21818 }, { "epoch": 0.7813848550504056, "grad_norm": 1.710242748260498, "learning_rate": 2.4033841247936517e-05, "loss": 1.5537, "step": 21819 }, { "epoch": 0.7814206671799738, "grad_norm": 2.031437397003174, "learning_rate": 2.4026298705193972e-05, "loss": 1.6633, "step": 21820 }, { "epoch": 0.7814564793095421, "grad_norm": 1.5349557399749756, "learning_rate": 2.4018757184579545e-05, "loss": 1.2571, "step": 21821 }, { "epoch": 0.7814922914391105, "grad_norm": 1.6792068481445312, "learning_rate": 2.401121668619474e-05, "loss": 1.5264, "step": 21822 }, { "epoch": 0.7815281035686787, "grad_norm": 1.345013976097107, "learning_rate": 2.4003677210140986e-05, "loss": 1.1816, "step": 21823 }, { "epoch": 0.781563915698247, "grad_norm": 1.3718589544296265, "learning_rate": 2.3996138756519758e-05, "loss": 1.336, "step": 21824 }, { "epoch": 0.7815997278278153, "grad_norm": 1.577532410621643, "learning_rate": 2.3988601325432415e-05, "loss": 1.72, "step": 21825 }, { "epoch": 0.7816355399573836, "grad_norm": 1.7576181888580322, "learning_rate": 2.39810649169804e-05, "loss": 1.5281, "step": 21826 }, { "epoch": 0.7816713520869518, "grad_norm": 1.4686623811721802, "learning_rate": 2.3973529531265095e-05, "loss": 1.1936, "step": 21827 }, { "epoch": 0.7817071642165201, "grad_norm": 1.418623685836792, "learning_rate": 2.396599516838791e-05, "loss": 1.2775, "step": 21828 }, { "epoch": 0.7817429763460885, "grad_norm": 1.3246876001358032, "learning_rate": 2.3958461828450164e-05, "loss": 1.3822, "step": 21829 }, { "epoch": 0.7817787884756567, "grad_norm": 1.7767356634140015, "learning_rate": 2.3950929511553223e-05, "loss": 1.4197, "step": 21830 }, { "epoch": 0.781814600605225, "grad_norm": 1.667604684829712, "learning_rate": 2.3943398217798452e-05, "loss": 1.3056, "step": 21831 }, { "epoch": 0.7818504127347933, "grad_norm": 1.5106874704360962, "learning_rate": 2.393586794728713e-05, "loss": 1.2763, "step": 21832 }, { "epoch": 0.7818862248643615, "grad_norm": 1.9181849956512451, "learning_rate": 2.3928338700120578e-05, "loss": 1.4003, "step": 21833 }, { "epoch": 0.7819220369939298, "grad_norm": 1.5137401819229126, "learning_rate": 2.3920810476400112e-05, "loss": 1.6359, "step": 21834 }, { "epoch": 0.7819578491234981, "grad_norm": 1.4727210998535156, "learning_rate": 2.391328327622704e-05, "loss": 1.39, "step": 21835 }, { "epoch": 0.7819936612530665, "grad_norm": 1.6446338891983032, "learning_rate": 2.3905757099702564e-05, "loss": 1.6383, "step": 21836 }, { "epoch": 0.7820294733826347, "grad_norm": 1.7629797458648682, "learning_rate": 2.3898231946927963e-05, "loss": 1.5718, "step": 21837 }, { "epoch": 0.782065285512203, "grad_norm": 1.1665034294128418, "learning_rate": 2.3890707818004522e-05, "loss": 1.5076, "step": 21838 }, { "epoch": 0.7821010976417713, "grad_norm": 1.5729949474334717, "learning_rate": 2.3883184713033414e-05, "loss": 1.2453, "step": 21839 }, { "epoch": 0.7821369097713395, "grad_norm": 1.3064537048339844, "learning_rate": 2.387566263211586e-05, "loss": 1.4201, "step": 21840 }, { "epoch": 0.7821727219009078, "grad_norm": 1.7350980043411255, "learning_rate": 2.3868141575353077e-05, "loss": 1.461, "step": 21841 }, { "epoch": 0.7822085340304761, "grad_norm": 1.9520295858383179, "learning_rate": 2.3860621542846273e-05, "loss": 1.5801, "step": 21842 }, { "epoch": 0.7822443461600445, "grad_norm": 1.5008251667022705, "learning_rate": 2.3853102534696557e-05, "loss": 0.9946, "step": 21843 }, { "epoch": 0.7822801582896127, "grad_norm": 1.6487131118774414, "learning_rate": 2.384558455100514e-05, "loss": 1.537, "step": 21844 }, { "epoch": 0.782315970419181, "grad_norm": 1.7409266233444214, "learning_rate": 2.3838067591873136e-05, "loss": 1.2899, "step": 21845 }, { "epoch": 0.7823517825487493, "grad_norm": 1.6287227869033813, "learning_rate": 2.3830551657401723e-05, "loss": 1.2628, "step": 21846 }, { "epoch": 0.7823875946783175, "grad_norm": 1.5295032262802124, "learning_rate": 2.3823036747691995e-05, "loss": 1.081, "step": 21847 }, { "epoch": 0.7824234068078858, "grad_norm": 1.78057062625885, "learning_rate": 2.3815522862844985e-05, "loss": 1.4897, "step": 21848 }, { "epoch": 0.7824592189374541, "grad_norm": 1.901196002960205, "learning_rate": 2.3808010002961902e-05, "loss": 1.394, "step": 21849 }, { "epoch": 0.7824950310670225, "grad_norm": 1.6514678001403809, "learning_rate": 2.3800498168143726e-05, "loss": 1.161, "step": 21850 }, { "epoch": 0.7825308431965907, "grad_norm": 1.6724635362625122, "learning_rate": 2.379298735849156e-05, "loss": 1.4317, "step": 21851 }, { "epoch": 0.782566655326159, "grad_norm": 1.8803189992904663, "learning_rate": 2.378547757410645e-05, "loss": 1.1883, "step": 21852 }, { "epoch": 0.7826024674557273, "grad_norm": 1.2810298204421997, "learning_rate": 2.377796881508947e-05, "loss": 1.2568, "step": 21853 }, { "epoch": 0.7826382795852955, "grad_norm": 1.5125658512115479, "learning_rate": 2.3770461081541563e-05, "loss": 1.3659, "step": 21854 }, { "epoch": 0.7826740917148638, "grad_norm": 2.5842549800872803, "learning_rate": 2.3762954373563763e-05, "loss": 1.5197, "step": 21855 }, { "epoch": 0.7827099038444321, "grad_norm": 1.7202882766723633, "learning_rate": 2.375544869125711e-05, "loss": 1.4987, "step": 21856 }, { "epoch": 0.7827457159740004, "grad_norm": 1.7006173133850098, "learning_rate": 2.3747944034722524e-05, "loss": 1.3801, "step": 21857 }, { "epoch": 0.7827815281035687, "grad_norm": 1.8149973154067993, "learning_rate": 2.3740440404061015e-05, "loss": 1.214, "step": 21858 }, { "epoch": 0.782817340233137, "grad_norm": 1.8355730772018433, "learning_rate": 2.3732937799373455e-05, "loss": 1.286, "step": 21859 }, { "epoch": 0.7828531523627053, "grad_norm": 1.700737476348877, "learning_rate": 2.37254362207609e-05, "loss": 1.4152, "step": 21860 }, { "epoch": 0.7828889644922735, "grad_norm": 1.5340129137039185, "learning_rate": 2.3717935668324186e-05, "loss": 1.4033, "step": 21861 }, { "epoch": 0.7829247766218418, "grad_norm": 1.533355474472046, "learning_rate": 2.371043614216425e-05, "loss": 1.205, "step": 21862 }, { "epoch": 0.7829605887514101, "grad_norm": 2.0897789001464844, "learning_rate": 2.3702937642381985e-05, "loss": 1.3425, "step": 21863 }, { "epoch": 0.7829964008809784, "grad_norm": 1.8037362098693848, "learning_rate": 2.369544016907831e-05, "loss": 1.2512, "step": 21864 }, { "epoch": 0.7830322130105467, "grad_norm": 2.2047224044799805, "learning_rate": 2.3687943722354056e-05, "loss": 1.8162, "step": 21865 }, { "epoch": 0.783068025140115, "grad_norm": 1.6913021802902222, "learning_rate": 2.3680448302310032e-05, "loss": 1.2962, "step": 21866 }, { "epoch": 0.7831038372696832, "grad_norm": 1.605500340461731, "learning_rate": 2.367295390904719e-05, "loss": 1.8098, "step": 21867 }, { "epoch": 0.7831396493992515, "grad_norm": 2.315788984298706, "learning_rate": 2.3665460542666263e-05, "loss": 1.3339, "step": 21868 }, { "epoch": 0.7831754615288198, "grad_norm": 2.22713041305542, "learning_rate": 2.3657968203268133e-05, "loss": 1.512, "step": 21869 }, { "epoch": 0.783211273658388, "grad_norm": 2.339245319366455, "learning_rate": 2.365047689095351e-05, "loss": 1.2251, "step": 21870 }, { "epoch": 0.7832470857879564, "grad_norm": 2.6898417472839355, "learning_rate": 2.3642986605823292e-05, "loss": 1.8021, "step": 21871 }, { "epoch": 0.7832828979175247, "grad_norm": 1.8308742046356201, "learning_rate": 2.3635497347978176e-05, "loss": 1.4888, "step": 21872 }, { "epoch": 0.783318710047093, "grad_norm": 2.016420841217041, "learning_rate": 2.3628009117518956e-05, "loss": 1.4186, "step": 21873 }, { "epoch": 0.7833545221766612, "grad_norm": 1.976356863975525, "learning_rate": 2.3620521914546334e-05, "loss": 1.5075, "step": 21874 }, { "epoch": 0.7833903343062295, "grad_norm": 2.4779212474823, "learning_rate": 2.361303573916107e-05, "loss": 1.5026, "step": 21875 }, { "epoch": 0.7834261464357978, "grad_norm": 1.4665014743804932, "learning_rate": 2.360555059146391e-05, "loss": 1.2472, "step": 21876 }, { "epoch": 0.783461958565366, "grad_norm": 1.507570505142212, "learning_rate": 2.359806647155547e-05, "loss": 1.339, "step": 21877 }, { "epoch": 0.7834977706949344, "grad_norm": 1.5394234657287598, "learning_rate": 2.3590583379536535e-05, "loss": 1.5506, "step": 21878 }, { "epoch": 0.7835335828245027, "grad_norm": 1.5413225889205933, "learning_rate": 2.358310131550773e-05, "loss": 1.5083, "step": 21879 }, { "epoch": 0.783569394954071, "grad_norm": 1.6394191980361938, "learning_rate": 2.3575620279569743e-05, "loss": 1.3997, "step": 21880 }, { "epoch": 0.7836052070836392, "grad_norm": 1.2522400617599487, "learning_rate": 2.3568140271823147e-05, "loss": 1.5342, "step": 21881 }, { "epoch": 0.7836410192132075, "grad_norm": 1.663016676902771, "learning_rate": 2.3560661292368702e-05, "loss": 1.2195, "step": 21882 }, { "epoch": 0.7836768313427758, "grad_norm": 1.80072820186615, "learning_rate": 2.355318334130695e-05, "loss": 1.3811, "step": 21883 }, { "epoch": 0.783712643472344, "grad_norm": 1.770574688911438, "learning_rate": 2.3545706418738476e-05, "loss": 1.476, "step": 21884 }, { "epoch": 0.7837484556019124, "grad_norm": 1.4288097620010376, "learning_rate": 2.3538230524763914e-05, "loss": 1.4681, "step": 21885 }, { "epoch": 0.7837842677314807, "grad_norm": 1.5454676151275635, "learning_rate": 2.353075565948383e-05, "loss": 1.1621, "step": 21886 }, { "epoch": 0.783820079861049, "grad_norm": 1.748083472251892, "learning_rate": 2.352328182299881e-05, "loss": 1.2265, "step": 21887 }, { "epoch": 0.7838558919906172, "grad_norm": 1.7457653284072876, "learning_rate": 2.351580901540933e-05, "loss": 1.3389, "step": 21888 }, { "epoch": 0.7838917041201855, "grad_norm": 1.464296817779541, "learning_rate": 2.3508337236816047e-05, "loss": 1.4847, "step": 21889 }, { "epoch": 0.7839275162497538, "grad_norm": 4.318163871765137, "learning_rate": 2.3500866487319384e-05, "loss": 1.2113, "step": 21890 }, { "epoch": 0.783963328379322, "grad_norm": 2.506546974182129, "learning_rate": 2.3493396767019915e-05, "loss": 1.8466, "step": 21891 }, { "epoch": 0.7839991405088904, "grad_norm": 1.6039185523986816, "learning_rate": 2.348592807601808e-05, "loss": 1.4501, "step": 21892 }, { "epoch": 0.7840349526384587, "grad_norm": 1.9799975156784058, "learning_rate": 2.3478460414414382e-05, "loss": 1.2276, "step": 21893 }, { "epoch": 0.784070764768027, "grad_norm": 1.671531319618225, "learning_rate": 2.3470993782309324e-05, "loss": 1.3684, "step": 21894 }, { "epoch": 0.7841065768975952, "grad_norm": 1.4565225839614868, "learning_rate": 2.3463528179803305e-05, "loss": 1.2772, "step": 21895 }, { "epoch": 0.7841423890271635, "grad_norm": 1.440674901008606, "learning_rate": 2.3456063606996783e-05, "loss": 1.4639, "step": 21896 }, { "epoch": 0.7841782011567318, "grad_norm": 3.0526788234710693, "learning_rate": 2.34486000639902e-05, "loss": 1.2457, "step": 21897 }, { "epoch": 0.7842140132863, "grad_norm": 1.7137478590011597, "learning_rate": 2.3441137550883974e-05, "loss": 1.3624, "step": 21898 }, { "epoch": 0.7842498254158684, "grad_norm": 1.5892608165740967, "learning_rate": 2.3433676067778465e-05, "loss": 1.6998, "step": 21899 }, { "epoch": 0.7842856375454367, "grad_norm": 1.6187303066253662, "learning_rate": 2.3426215614774094e-05, "loss": 1.4802, "step": 21900 }, { "epoch": 0.784321449675005, "grad_norm": 1.3965293169021606, "learning_rate": 2.3418756191971235e-05, "loss": 1.6476, "step": 21901 }, { "epoch": 0.7843572618045732, "grad_norm": 1.4413694143295288, "learning_rate": 2.34112977994702e-05, "loss": 1.5815, "step": 21902 }, { "epoch": 0.7843930739341415, "grad_norm": 2.764780282974243, "learning_rate": 2.340384043737136e-05, "loss": 1.3733, "step": 21903 }, { "epoch": 0.7844288860637098, "grad_norm": 1.5479607582092285, "learning_rate": 2.339638410577505e-05, "loss": 1.4778, "step": 21904 }, { "epoch": 0.784464698193278, "grad_norm": 1.5237574577331543, "learning_rate": 2.3388928804781608e-05, "loss": 1.1754, "step": 21905 }, { "epoch": 0.7845005103228464, "grad_norm": 2.09736704826355, "learning_rate": 2.3381474534491276e-05, "loss": 1.315, "step": 21906 }, { "epoch": 0.7845363224524147, "grad_norm": 1.7399797439575195, "learning_rate": 2.337402129500438e-05, "loss": 1.087, "step": 21907 }, { "epoch": 0.7845721345819829, "grad_norm": 2.0457167625427246, "learning_rate": 2.3366569086421175e-05, "loss": 1.4334, "step": 21908 }, { "epoch": 0.7846079467115512, "grad_norm": 1.504894733428955, "learning_rate": 2.3359117908841966e-05, "loss": 1.5605, "step": 21909 }, { "epoch": 0.7846437588411195, "grad_norm": 1.2692909240722656, "learning_rate": 2.3351667762366948e-05, "loss": 1.4162, "step": 21910 }, { "epoch": 0.7846795709706877, "grad_norm": 1.8870021104812622, "learning_rate": 2.334421864709636e-05, "loss": 1.6566, "step": 21911 }, { "epoch": 0.784715383100256, "grad_norm": 2.1324985027313232, "learning_rate": 2.3336770563130463e-05, "loss": 1.5475, "step": 21912 }, { "epoch": 0.7847511952298244, "grad_norm": 2.145512580871582, "learning_rate": 2.33293235105694e-05, "loss": 1.5966, "step": 21913 }, { "epoch": 0.7847870073593927, "grad_norm": 1.569747805595398, "learning_rate": 2.332187748951339e-05, "loss": 1.2442, "step": 21914 }, { "epoch": 0.7848228194889609, "grad_norm": 1.6398723125457764, "learning_rate": 2.331443250006261e-05, "loss": 1.4996, "step": 21915 }, { "epoch": 0.7848586316185292, "grad_norm": 1.8749650716781616, "learning_rate": 2.3306988542317255e-05, "loss": 1.2954, "step": 21916 }, { "epoch": 0.7848944437480975, "grad_norm": 1.3666261434555054, "learning_rate": 2.3299545616377415e-05, "loss": 1.1951, "step": 21917 }, { "epoch": 0.7849302558776657, "grad_norm": 1.1874198913574219, "learning_rate": 2.329210372234325e-05, "loss": 1.4692, "step": 21918 }, { "epoch": 0.784966068007234, "grad_norm": 1.760377049446106, "learning_rate": 2.3284662860314922e-05, "loss": 1.1574, "step": 21919 }, { "epoch": 0.7850018801368024, "grad_norm": 1.4190868139266968, "learning_rate": 2.3277223030392458e-05, "loss": 1.2522, "step": 21920 }, { "epoch": 0.7850376922663707, "grad_norm": 1.8131829500198364, "learning_rate": 2.3269784232675995e-05, "loss": 1.4429, "step": 21921 }, { "epoch": 0.7850735043959389, "grad_norm": 1.7088327407836914, "learning_rate": 2.3262346467265605e-05, "loss": 1.3675, "step": 21922 }, { "epoch": 0.7851093165255072, "grad_norm": 1.543448567390442, "learning_rate": 2.3254909734261398e-05, "loss": 1.5017, "step": 21923 }, { "epoch": 0.7851451286550755, "grad_norm": 2.0293030738830566, "learning_rate": 2.324747403376336e-05, "loss": 1.5802, "step": 21924 }, { "epoch": 0.7851809407846437, "grad_norm": 1.8709771633148193, "learning_rate": 2.3240039365871546e-05, "loss": 1.7063, "step": 21925 }, { "epoch": 0.785216752914212, "grad_norm": 1.7643381357192993, "learning_rate": 2.3232605730686018e-05, "loss": 1.4052, "step": 21926 }, { "epoch": 0.7852525650437804, "grad_norm": 1.706915020942688, "learning_rate": 2.3225173128306733e-05, "loss": 1.2809, "step": 21927 }, { "epoch": 0.7852883771733487, "grad_norm": 1.7989875078201294, "learning_rate": 2.3217741558833706e-05, "loss": 1.5696, "step": 21928 }, { "epoch": 0.7853241893029169, "grad_norm": 1.6285196542739868, "learning_rate": 2.321031102236694e-05, "loss": 1.1875, "step": 21929 }, { "epoch": 0.7853600014324852, "grad_norm": 1.5075069665908813, "learning_rate": 2.3202881519006393e-05, "loss": 1.3279, "step": 21930 }, { "epoch": 0.7853958135620535, "grad_norm": 1.7992472648620605, "learning_rate": 2.3195453048852e-05, "loss": 1.5811, "step": 21931 }, { "epoch": 0.7854316256916217, "grad_norm": 1.7869278192520142, "learning_rate": 2.3188025612003718e-05, "loss": 1.7683, "step": 21932 }, { "epoch": 0.78546743782119, "grad_norm": 1.4537584781646729, "learning_rate": 2.318059920856146e-05, "loss": 1.3219, "step": 21933 }, { "epoch": 0.7855032499507584, "grad_norm": 1.5661792755126953, "learning_rate": 2.3173173838625183e-05, "loss": 1.4321, "step": 21934 }, { "epoch": 0.7855390620803266, "grad_norm": 1.5315093994140625, "learning_rate": 2.316574950229472e-05, "loss": 1.5277, "step": 21935 }, { "epoch": 0.7855748742098949, "grad_norm": 2.292695999145508, "learning_rate": 2.315832619967e-05, "loss": 1.0601, "step": 21936 }, { "epoch": 0.7856106863394632, "grad_norm": 1.7151291370391846, "learning_rate": 2.3150903930850896e-05, "loss": 1.4839, "step": 21937 }, { "epoch": 0.7856464984690315, "grad_norm": 1.6335084438323975, "learning_rate": 2.3143482695937235e-05, "loss": 1.2555, "step": 21938 }, { "epoch": 0.7856823105985997, "grad_norm": 1.5263906717300415, "learning_rate": 2.3136062495028876e-05, "loss": 1.4354, "step": 21939 }, { "epoch": 0.785718122728168, "grad_norm": 1.3875404596328735, "learning_rate": 2.312864332822564e-05, "loss": 1.4227, "step": 21940 }, { "epoch": 0.7857539348577364, "grad_norm": 2.1404190063476562, "learning_rate": 2.3121225195627382e-05, "loss": 1.6252, "step": 21941 }, { "epoch": 0.7857897469873046, "grad_norm": 1.6052498817443848, "learning_rate": 2.3113808097333854e-05, "loss": 1.5175, "step": 21942 }, { "epoch": 0.7858255591168729, "grad_norm": 1.234230637550354, "learning_rate": 2.3106392033444856e-05, "loss": 1.189, "step": 21943 }, { "epoch": 0.7858613712464412, "grad_norm": 1.5555446147918701, "learning_rate": 2.3098977004060185e-05, "loss": 1.3555, "step": 21944 }, { "epoch": 0.7858971833760094, "grad_norm": 1.4354848861694336, "learning_rate": 2.309156300927957e-05, "loss": 1.4896, "step": 21945 }, { "epoch": 0.7859329955055777, "grad_norm": 1.6179566383361816, "learning_rate": 2.308415004920277e-05, "loss": 1.3944, "step": 21946 }, { "epoch": 0.785968807635146, "grad_norm": 2.2030816078186035, "learning_rate": 2.307673812392951e-05, "loss": 1.605, "step": 21947 }, { "epoch": 0.7860046197647144, "grad_norm": 1.3560844659805298, "learning_rate": 2.3069327233559533e-05, "loss": 1.143, "step": 21948 }, { "epoch": 0.7860404318942826, "grad_norm": 1.650636076927185, "learning_rate": 2.306191737819251e-05, "loss": 1.3462, "step": 21949 }, { "epoch": 0.7860762440238509, "grad_norm": 1.7846274375915527, "learning_rate": 2.3054508557928144e-05, "loss": 1.6821, "step": 21950 }, { "epoch": 0.7861120561534192, "grad_norm": 1.6764312982559204, "learning_rate": 2.3047100772866114e-05, "loss": 1.4222, "step": 21951 }, { "epoch": 0.7861478682829874, "grad_norm": 2.348435878753662, "learning_rate": 2.3039694023106106e-05, "loss": 1.341, "step": 21952 }, { "epoch": 0.7861836804125557, "grad_norm": 1.734067678451538, "learning_rate": 2.303228830874775e-05, "loss": 1.3237, "step": 21953 }, { "epoch": 0.786219492542124, "grad_norm": 2.134556770324707, "learning_rate": 2.3024883629890604e-05, "loss": 1.5639, "step": 21954 }, { "epoch": 0.7862553046716924, "grad_norm": 1.831900954246521, "learning_rate": 2.3017479986634426e-05, "loss": 1.4596, "step": 21955 }, { "epoch": 0.7862911168012606, "grad_norm": 2.242368698120117, "learning_rate": 2.3010077379078722e-05, "loss": 1.2863, "step": 21956 }, { "epoch": 0.7863269289308289, "grad_norm": 1.671486496925354, "learning_rate": 2.300267580732315e-05, "loss": 1.3689, "step": 21957 }, { "epoch": 0.7863627410603972, "grad_norm": 1.524246335029602, "learning_rate": 2.2995275271467187e-05, "loss": 1.4023, "step": 21958 }, { "epoch": 0.7863985531899654, "grad_norm": 2.152644157409668, "learning_rate": 2.2987875771610534e-05, "loss": 1.3216, "step": 21959 }, { "epoch": 0.7864343653195337, "grad_norm": 1.521066427230835, "learning_rate": 2.2980477307852642e-05, "loss": 1.1968, "step": 21960 }, { "epoch": 0.786470177449102, "grad_norm": 1.6909211874008179, "learning_rate": 2.297307988029308e-05, "loss": 1.565, "step": 21961 }, { "epoch": 0.7865059895786704, "grad_norm": 1.748152732849121, "learning_rate": 2.29656834890314e-05, "loss": 1.3081, "step": 21962 }, { "epoch": 0.7865418017082386, "grad_norm": 1.6962015628814697, "learning_rate": 2.2958288134167048e-05, "loss": 1.2831, "step": 21963 }, { "epoch": 0.7865776138378069, "grad_norm": 1.4486021995544434, "learning_rate": 2.295089381579959e-05, "loss": 1.4332, "step": 21964 }, { "epoch": 0.7866134259673752, "grad_norm": 1.4903663396835327, "learning_rate": 2.2943500534028406e-05, "loss": 1.4113, "step": 21965 }, { "epoch": 0.7866492380969434, "grad_norm": 1.5759414434432983, "learning_rate": 2.2936108288953083e-05, "loss": 1.2901, "step": 21966 }, { "epoch": 0.7866850502265117, "grad_norm": 1.741335153579712, "learning_rate": 2.292871708067299e-05, "loss": 1.257, "step": 21967 }, { "epoch": 0.78672086235608, "grad_norm": 2.2893245220184326, "learning_rate": 2.2921326909287634e-05, "loss": 1.5896, "step": 21968 }, { "epoch": 0.7867566744856483, "grad_norm": 1.9488850831985474, "learning_rate": 2.291393777489632e-05, "loss": 1.4638, "step": 21969 }, { "epoch": 0.7867924866152166, "grad_norm": 1.3496569395065308, "learning_rate": 2.290654967759862e-05, "loss": 1.6248, "step": 21970 }, { "epoch": 0.7868282987447849, "grad_norm": 2.044699192047119, "learning_rate": 2.289916261749383e-05, "loss": 1.306, "step": 21971 }, { "epoch": 0.7868641108743532, "grad_norm": 1.3044853210449219, "learning_rate": 2.2891776594681315e-05, "loss": 1.4078, "step": 21972 }, { "epoch": 0.7868999230039214, "grad_norm": 1.7480604648590088, "learning_rate": 2.2884391609260525e-05, "loss": 1.2894, "step": 21973 }, { "epoch": 0.7869357351334897, "grad_norm": 1.638363242149353, "learning_rate": 2.2877007661330762e-05, "loss": 1.638, "step": 21974 }, { "epoch": 0.786971547263058, "grad_norm": 1.6407605409622192, "learning_rate": 2.2869624750991393e-05, "loss": 1.3685, "step": 21975 }, { "epoch": 0.7870073593926263, "grad_norm": 1.5192142724990845, "learning_rate": 2.2862242878341678e-05, "loss": 1.8149, "step": 21976 }, { "epoch": 0.7870431715221946, "grad_norm": 1.6428052186965942, "learning_rate": 2.285486204348105e-05, "loss": 1.4984, "step": 21977 }, { "epoch": 0.7870789836517629, "grad_norm": 1.9950889348983765, "learning_rate": 2.284748224650871e-05, "loss": 1.5231, "step": 21978 }, { "epoch": 0.7871147957813311, "grad_norm": 2.0278079509735107, "learning_rate": 2.2840103487524e-05, "loss": 1.3566, "step": 21979 }, { "epoch": 0.7871506079108994, "grad_norm": 1.671902060508728, "learning_rate": 2.283272576662615e-05, "loss": 1.5062, "step": 21980 }, { "epoch": 0.7871864200404677, "grad_norm": 1.5835262537002563, "learning_rate": 2.2825349083914426e-05, "loss": 1.4686, "step": 21981 }, { "epoch": 0.787222232170036, "grad_norm": 1.7632768154144287, "learning_rate": 2.2817973439488117e-05, "loss": 1.3356, "step": 21982 }, { "epoch": 0.7872580442996042, "grad_norm": 1.718269944190979, "learning_rate": 2.2810598833446382e-05, "loss": 1.306, "step": 21983 }, { "epoch": 0.7872938564291726, "grad_norm": 1.9307987689971924, "learning_rate": 2.2803225265888484e-05, "loss": 1.5905, "step": 21984 }, { "epoch": 0.7873296685587409, "grad_norm": 1.7241172790527344, "learning_rate": 2.2795852736913604e-05, "loss": 1.4503, "step": 21985 }, { "epoch": 0.7873654806883091, "grad_norm": 1.7079567909240723, "learning_rate": 2.2788481246620973e-05, "loss": 1.2879, "step": 21986 }, { "epoch": 0.7874012928178774, "grad_norm": 1.3769007921218872, "learning_rate": 2.2781110795109674e-05, "loss": 1.525, "step": 21987 }, { "epoch": 0.7874371049474457, "grad_norm": 1.4252955913543701, "learning_rate": 2.2773741382478975e-05, "loss": 1.5648, "step": 21988 }, { "epoch": 0.787472917077014, "grad_norm": 1.4553250074386597, "learning_rate": 2.276637300882797e-05, "loss": 1.5559, "step": 21989 }, { "epoch": 0.7875087292065822, "grad_norm": 1.6732171773910522, "learning_rate": 2.2759005674255774e-05, "loss": 1.2551, "step": 21990 }, { "epoch": 0.7875445413361506, "grad_norm": 1.594787836074829, "learning_rate": 2.275163937886151e-05, "loss": 1.4659, "step": 21991 }, { "epoch": 0.7875803534657189, "grad_norm": 1.33848237991333, "learning_rate": 2.2744274122744304e-05, "loss": 1.2285, "step": 21992 }, { "epoch": 0.7876161655952871, "grad_norm": 1.8487385511398315, "learning_rate": 2.2736909906003266e-05, "loss": 1.879, "step": 21993 }, { "epoch": 0.7876519777248554, "grad_norm": 1.378639817237854, "learning_rate": 2.2729546728737416e-05, "loss": 1.6364, "step": 21994 }, { "epoch": 0.7876877898544237, "grad_norm": 1.6893432140350342, "learning_rate": 2.2722184591045835e-05, "loss": 1.5968, "step": 21995 }, { "epoch": 0.7877236019839919, "grad_norm": 1.7341252565383911, "learning_rate": 2.2714823493027583e-05, "loss": 1.5264, "step": 21996 }, { "epoch": 0.7877594141135602, "grad_norm": 1.3586167097091675, "learning_rate": 2.2707463434781718e-05, "loss": 1.3724, "step": 21997 }, { "epoch": 0.7877952262431286, "grad_norm": 1.5334831476211548, "learning_rate": 2.2700104416407208e-05, "loss": 1.736, "step": 21998 }, { "epoch": 0.7878310383726969, "grad_norm": 1.5055005550384521, "learning_rate": 2.2692746438003078e-05, "loss": 1.2053, "step": 21999 }, { "epoch": 0.7878668505022651, "grad_norm": 2.348088502883911, "learning_rate": 2.2685389499668352e-05, "loss": 1.5909, "step": 22000 }, { "epoch": 0.7879026626318334, "grad_norm": 1.4939537048339844, "learning_rate": 2.2678033601501957e-05, "loss": 1.5303, "step": 22001 }, { "epoch": 0.7879384747614017, "grad_norm": 1.1772562265396118, "learning_rate": 2.2670678743602892e-05, "loss": 1.2715, "step": 22002 }, { "epoch": 0.7879742868909699, "grad_norm": 1.5099385976791382, "learning_rate": 2.2663324926070086e-05, "loss": 1.3447, "step": 22003 }, { "epoch": 0.7880100990205382, "grad_norm": 1.5559260845184326, "learning_rate": 2.2655972149002512e-05, "loss": 1.1992, "step": 22004 }, { "epoch": 0.7880459111501066, "grad_norm": 2.2137198448181152, "learning_rate": 2.2648620412499045e-05, "loss": 1.7298, "step": 22005 }, { "epoch": 0.7880817232796749, "grad_norm": 1.509599208831787, "learning_rate": 2.264126971665861e-05, "loss": 1.1957, "step": 22006 }, { "epoch": 0.7881175354092431, "grad_norm": 1.8853390216827393, "learning_rate": 2.2633920061580127e-05, "loss": 1.7451, "step": 22007 }, { "epoch": 0.7881533475388114, "grad_norm": 1.300235629081726, "learning_rate": 2.262657144736243e-05, "loss": 1.2197, "step": 22008 }, { "epoch": 0.7881891596683797, "grad_norm": 1.5796751976013184, "learning_rate": 2.2619223874104423e-05, "loss": 1.5641, "step": 22009 }, { "epoch": 0.7882249717979479, "grad_norm": 1.7323791980743408, "learning_rate": 2.261187734190493e-05, "loss": 1.5017, "step": 22010 }, { "epoch": 0.7882607839275162, "grad_norm": 1.4595657587051392, "learning_rate": 2.2604531850862832e-05, "loss": 1.5183, "step": 22011 }, { "epoch": 0.7882965960570846, "grad_norm": 2.023123264312744, "learning_rate": 2.2597187401076903e-05, "loss": 1.6822, "step": 22012 }, { "epoch": 0.7883324081866528, "grad_norm": 1.8436816930770874, "learning_rate": 2.2589843992645977e-05, "loss": 1.6796, "step": 22013 }, { "epoch": 0.7883682203162211, "grad_norm": 1.3676259517669678, "learning_rate": 2.258250162566887e-05, "loss": 1.4, "step": 22014 }, { "epoch": 0.7884040324457894, "grad_norm": 1.615670084953308, "learning_rate": 2.2575160300244314e-05, "loss": 1.2904, "step": 22015 }, { "epoch": 0.7884398445753577, "grad_norm": 1.8132332563400269, "learning_rate": 2.2567820016471107e-05, "loss": 1.19, "step": 22016 }, { "epoch": 0.7884756567049259, "grad_norm": 1.5410048961639404, "learning_rate": 2.256048077444801e-05, "loss": 1.4224, "step": 22017 }, { "epoch": 0.7885114688344942, "grad_norm": 1.866053581237793, "learning_rate": 2.2553142574273777e-05, "loss": 1.4266, "step": 22018 }, { "epoch": 0.7885472809640626, "grad_norm": 1.684851884841919, "learning_rate": 2.2545805416047073e-05, "loss": 1.4562, "step": 22019 }, { "epoch": 0.7885830930936308, "grad_norm": 1.7239848375320435, "learning_rate": 2.253846929986666e-05, "loss": 1.2378, "step": 22020 }, { "epoch": 0.7886189052231991, "grad_norm": 2.0426554679870605, "learning_rate": 2.253113422583122e-05, "loss": 1.1809, "step": 22021 }, { "epoch": 0.7886547173527674, "grad_norm": 1.4321867227554321, "learning_rate": 2.252380019403947e-05, "loss": 1.1409, "step": 22022 }, { "epoch": 0.7886905294823356, "grad_norm": 1.3526005744934082, "learning_rate": 2.251646720459003e-05, "loss": 1.1834, "step": 22023 }, { "epoch": 0.7887263416119039, "grad_norm": 1.6505836248397827, "learning_rate": 2.250913525758157e-05, "loss": 1.3677, "step": 22024 }, { "epoch": 0.7887621537414722, "grad_norm": 1.5732851028442383, "learning_rate": 2.2501804353112765e-05, "loss": 1.6171, "step": 22025 }, { "epoch": 0.7887979658710406, "grad_norm": 1.22661554813385, "learning_rate": 2.249447449128219e-05, "loss": 1.4751, "step": 22026 }, { "epoch": 0.7888337780006088, "grad_norm": 1.3099799156188965, "learning_rate": 2.248714567218849e-05, "loss": 1.1526, "step": 22027 }, { "epoch": 0.7888695901301771, "grad_norm": 2.0355942249298096, "learning_rate": 2.2479817895930256e-05, "loss": 1.4029, "step": 22028 }, { "epoch": 0.7889054022597454, "grad_norm": 1.6288301944732666, "learning_rate": 2.247249116260611e-05, "loss": 1.5374, "step": 22029 }, { "epoch": 0.7889412143893136, "grad_norm": 1.6688481569290161, "learning_rate": 2.2465165472314564e-05, "loss": 1.449, "step": 22030 }, { "epoch": 0.7889770265188819, "grad_norm": 1.6263892650604248, "learning_rate": 2.2457840825154198e-05, "loss": 1.3263, "step": 22031 }, { "epoch": 0.7890128386484502, "grad_norm": 1.6034423112869263, "learning_rate": 2.24505172212236e-05, "loss": 1.4507, "step": 22032 }, { "epoch": 0.7890486507780186, "grad_norm": 2.850820541381836, "learning_rate": 2.2443194660621225e-05, "loss": 1.6127, "step": 22033 }, { "epoch": 0.7890844629075868, "grad_norm": 1.3363112211227417, "learning_rate": 2.243587314344563e-05, "loss": 1.3583, "step": 22034 }, { "epoch": 0.7891202750371551, "grad_norm": 1.8845534324645996, "learning_rate": 2.242855266979531e-05, "loss": 1.353, "step": 22035 }, { "epoch": 0.7891560871667234, "grad_norm": 1.926565170288086, "learning_rate": 2.242123323976878e-05, "loss": 1.6149, "step": 22036 }, { "epoch": 0.7891918992962916, "grad_norm": 1.9906445741653442, "learning_rate": 2.2413914853464455e-05, "loss": 1.466, "step": 22037 }, { "epoch": 0.7892277114258599, "grad_norm": 1.8436861038208008, "learning_rate": 2.240659751098083e-05, "loss": 1.4323, "step": 22038 }, { "epoch": 0.7892635235554282, "grad_norm": 1.6157275438308716, "learning_rate": 2.2399281212416346e-05, "loss": 1.3719, "step": 22039 }, { "epoch": 0.7892993356849966, "grad_norm": 1.502375841140747, "learning_rate": 2.2391965957869464e-05, "loss": 1.5709, "step": 22040 }, { "epoch": 0.7893351478145648, "grad_norm": 3.7131903171539307, "learning_rate": 2.2384651747438578e-05, "loss": 1.413, "step": 22041 }, { "epoch": 0.7893709599441331, "grad_norm": 1.402735948562622, "learning_rate": 2.237733858122203e-05, "loss": 1.5297, "step": 22042 }, { "epoch": 0.7894067720737014, "grad_norm": 1.9113142490386963, "learning_rate": 2.2370026459318315e-05, "loss": 1.4642, "step": 22043 }, { "epoch": 0.7894425842032696, "grad_norm": 2.179842710494995, "learning_rate": 2.236271538182574e-05, "loss": 1.5229, "step": 22044 }, { "epoch": 0.7894783963328379, "grad_norm": 1.7188292741775513, "learning_rate": 2.2355405348842672e-05, "loss": 1.3425, "step": 22045 }, { "epoch": 0.7895142084624062, "grad_norm": 2.006430149078369, "learning_rate": 2.2348096360467484e-05, "loss": 1.4632, "step": 22046 }, { "epoch": 0.7895500205919745, "grad_norm": 1.5962008237838745, "learning_rate": 2.2340788416798518e-05, "loss": 1.4263, "step": 22047 }, { "epoch": 0.7895858327215428, "grad_norm": 1.4921599626541138, "learning_rate": 2.233348151793404e-05, "loss": 1.2119, "step": 22048 }, { "epoch": 0.7896216448511111, "grad_norm": 1.3751320838928223, "learning_rate": 2.232617566397238e-05, "loss": 1.2617, "step": 22049 }, { "epoch": 0.7896574569806794, "grad_norm": 1.339401364326477, "learning_rate": 2.2318870855011874e-05, "loss": 1.5102, "step": 22050 }, { "epoch": 0.7896932691102476, "grad_norm": 1.2918152809143066, "learning_rate": 2.231156709115073e-05, "loss": 1.358, "step": 22051 }, { "epoch": 0.7897290812398159, "grad_norm": 1.8511288166046143, "learning_rate": 2.230426437248726e-05, "loss": 1.1148, "step": 22052 }, { "epoch": 0.7897648933693842, "grad_norm": 1.64247727394104, "learning_rate": 2.229696269911965e-05, "loss": 1.1974, "step": 22053 }, { "epoch": 0.7898007054989525, "grad_norm": 1.3478108644485474, "learning_rate": 2.228966207114622e-05, "loss": 1.6666, "step": 22054 }, { "epoch": 0.7898365176285208, "grad_norm": 1.3576805591583252, "learning_rate": 2.228236248866512e-05, "loss": 1.3979, "step": 22055 }, { "epoch": 0.7898723297580891, "grad_norm": 2.1761605739593506, "learning_rate": 2.2275063951774587e-05, "loss": 1.5848, "step": 22056 }, { "epoch": 0.7899081418876573, "grad_norm": 1.8470492362976074, "learning_rate": 2.2267766460572814e-05, "loss": 1.3471, "step": 22057 }, { "epoch": 0.7899439540172256, "grad_norm": 1.5361154079437256, "learning_rate": 2.226047001515801e-05, "loss": 1.6824, "step": 22058 }, { "epoch": 0.7899797661467939, "grad_norm": 1.5846877098083496, "learning_rate": 2.225317461562829e-05, "loss": 1.3616, "step": 22059 }, { "epoch": 0.7900155782763622, "grad_norm": 1.971751093864441, "learning_rate": 2.2245880262081774e-05, "loss": 1.6053, "step": 22060 }, { "epoch": 0.7900513904059305, "grad_norm": 1.9126818180084229, "learning_rate": 2.223858695461669e-05, "loss": 1.4891, "step": 22061 }, { "epoch": 0.7900872025354988, "grad_norm": 1.4563158750534058, "learning_rate": 2.2231294693331096e-05, "loss": 1.4378, "step": 22062 }, { "epoch": 0.7901230146650671, "grad_norm": 1.2912527322769165, "learning_rate": 2.222400347832314e-05, "loss": 1.4947, "step": 22063 }, { "epoch": 0.7901588267946353, "grad_norm": 1.532296061515808, "learning_rate": 2.221671330969084e-05, "loss": 1.4986, "step": 22064 }, { "epoch": 0.7901946389242036, "grad_norm": 1.5457974672317505, "learning_rate": 2.220942418753238e-05, "loss": 1.4653, "step": 22065 }, { "epoch": 0.7902304510537719, "grad_norm": 1.6285170316696167, "learning_rate": 2.220213611194576e-05, "loss": 1.5869, "step": 22066 }, { "epoch": 0.7902662631833401, "grad_norm": 1.7709981203079224, "learning_rate": 2.2194849083029057e-05, "loss": 1.2938, "step": 22067 }, { "epoch": 0.7903020753129085, "grad_norm": 1.4877737760543823, "learning_rate": 2.2187563100880282e-05, "loss": 1.5076, "step": 22068 }, { "epoch": 0.7903378874424768, "grad_norm": 1.7979744672775269, "learning_rate": 2.2180278165597467e-05, "loss": 1.6471, "step": 22069 }, { "epoch": 0.7903736995720451, "grad_norm": 1.7051743268966675, "learning_rate": 2.2172994277278668e-05, "loss": 1.4707, "step": 22070 }, { "epoch": 0.7904095117016133, "grad_norm": 2.3612325191497803, "learning_rate": 2.2165711436021774e-05, "loss": 1.3351, "step": 22071 }, { "epoch": 0.7904453238311816, "grad_norm": 1.6688679456710815, "learning_rate": 2.2158429641924895e-05, "loss": 1.2814, "step": 22072 }, { "epoch": 0.7904811359607499, "grad_norm": 1.6498329639434814, "learning_rate": 2.2151148895085906e-05, "loss": 1.6461, "step": 22073 }, { "epoch": 0.7905169480903181, "grad_norm": 2.0248377323150635, "learning_rate": 2.2143869195602816e-05, "loss": 1.5359, "step": 22074 }, { "epoch": 0.7905527602198865, "grad_norm": 1.48524010181427, "learning_rate": 2.2136590543573497e-05, "loss": 1.4948, "step": 22075 }, { "epoch": 0.7905885723494548, "grad_norm": 1.3780854940414429, "learning_rate": 2.212931293909596e-05, "loss": 1.2927, "step": 22076 }, { "epoch": 0.7906243844790231, "grad_norm": 1.5393905639648438, "learning_rate": 2.2122036382268074e-05, "loss": 0.9784, "step": 22077 }, { "epoch": 0.7906601966085913, "grad_norm": 1.6730303764343262, "learning_rate": 2.21147608731877e-05, "loss": 1.6592, "step": 22078 }, { "epoch": 0.7906960087381596, "grad_norm": 1.3171392679214478, "learning_rate": 2.210748641195276e-05, "loss": 1.4827, "step": 22079 }, { "epoch": 0.7907318208677279, "grad_norm": 1.4257128238677979, "learning_rate": 2.210021299866112e-05, "loss": 1.3873, "step": 22080 }, { "epoch": 0.7907676329972961, "grad_norm": 1.7365443706512451, "learning_rate": 2.209294063341065e-05, "loss": 1.4258, "step": 22081 }, { "epoch": 0.7908034451268645, "grad_norm": 1.7937761545181274, "learning_rate": 2.2085669316299117e-05, "loss": 1.8, "step": 22082 }, { "epoch": 0.7908392572564328, "grad_norm": 2.080672025680542, "learning_rate": 2.207839904742446e-05, "loss": 1.4081, "step": 22083 }, { "epoch": 0.790875069386001, "grad_norm": 2.0380451679229736, "learning_rate": 2.2071129826884397e-05, "loss": 1.2983, "step": 22084 }, { "epoch": 0.7909108815155693, "grad_norm": 1.4107887744903564, "learning_rate": 2.2063861654776798e-05, "loss": 1.4714, "step": 22085 }, { "epoch": 0.7909466936451376, "grad_norm": 1.3979287147521973, "learning_rate": 2.205659453119938e-05, "loss": 1.2243, "step": 22086 }, { "epoch": 0.7909825057747059, "grad_norm": 1.476107120513916, "learning_rate": 2.204932845624994e-05, "loss": 1.5036, "step": 22087 }, { "epoch": 0.7910183179042741, "grad_norm": 1.4876946210861206, "learning_rate": 2.204206343002626e-05, "loss": 1.4428, "step": 22088 }, { "epoch": 0.7910541300338425, "grad_norm": 1.7648842334747314, "learning_rate": 2.2034799452626043e-05, "loss": 1.5549, "step": 22089 }, { "epoch": 0.7910899421634108, "grad_norm": 1.5976637601852417, "learning_rate": 2.2027536524147017e-05, "loss": 1.6226, "step": 22090 }, { "epoch": 0.791125754292979, "grad_norm": 1.9662413597106934, "learning_rate": 2.2020274644686922e-05, "loss": 1.2422, "step": 22091 }, { "epoch": 0.7911615664225473, "grad_norm": 1.4056212902069092, "learning_rate": 2.2013013814343465e-05, "loss": 1.2724, "step": 22092 }, { "epoch": 0.7911973785521156, "grad_norm": 1.9056984186172485, "learning_rate": 2.200575403321429e-05, "loss": 1.7059, "step": 22093 }, { "epoch": 0.7912331906816839, "grad_norm": 1.6030707359313965, "learning_rate": 2.1998495301397083e-05, "loss": 1.666, "step": 22094 }, { "epoch": 0.7912690028112521, "grad_norm": 1.5489169359207153, "learning_rate": 2.1991237618989535e-05, "loss": 1.2683, "step": 22095 }, { "epoch": 0.7913048149408205, "grad_norm": 1.811343789100647, "learning_rate": 2.1983980986089235e-05, "loss": 1.1581, "step": 22096 }, { "epoch": 0.7913406270703888, "grad_norm": 1.4421796798706055, "learning_rate": 2.197672540279384e-05, "loss": 1.3113, "step": 22097 }, { "epoch": 0.791376439199957, "grad_norm": 2.07533860206604, "learning_rate": 2.196947086920096e-05, "loss": 1.4348, "step": 22098 }, { "epoch": 0.7914122513295253, "grad_norm": 1.6728070974349976, "learning_rate": 2.1962217385408225e-05, "loss": 1.508, "step": 22099 }, { "epoch": 0.7914480634590936, "grad_norm": 1.8283659219741821, "learning_rate": 2.1954964951513168e-05, "loss": 1.5883, "step": 22100 }, { "epoch": 0.7914838755886618, "grad_norm": 1.3747072219848633, "learning_rate": 2.1947713567613382e-05, "loss": 1.1613, "step": 22101 }, { "epoch": 0.7915196877182301, "grad_norm": 1.5672385692596436, "learning_rate": 2.194046323380643e-05, "loss": 1.4233, "step": 22102 }, { "epoch": 0.7915554998477985, "grad_norm": 1.5005253553390503, "learning_rate": 2.193321395018989e-05, "loss": 1.303, "step": 22103 }, { "epoch": 0.7915913119773668, "grad_norm": 1.7410175800323486, "learning_rate": 2.192596571686123e-05, "loss": 1.4531, "step": 22104 }, { "epoch": 0.791627124106935, "grad_norm": 3.041870594024658, "learning_rate": 2.1918718533917982e-05, "loss": 1.7464, "step": 22105 }, { "epoch": 0.7916629362365033, "grad_norm": 1.4313814640045166, "learning_rate": 2.1911472401457688e-05, "loss": 1.426, "step": 22106 }, { "epoch": 0.7916987483660716, "grad_norm": 2.0393707752227783, "learning_rate": 2.1904227319577786e-05, "loss": 1.4193, "step": 22107 }, { "epoch": 0.7917345604956398, "grad_norm": 1.6360301971435547, "learning_rate": 2.1896983288375773e-05, "loss": 1.0499, "step": 22108 }, { "epoch": 0.7917703726252081, "grad_norm": 2.0467658042907715, "learning_rate": 2.188974030794909e-05, "loss": 1.6207, "step": 22109 }, { "epoch": 0.7918061847547765, "grad_norm": 1.2687796354293823, "learning_rate": 2.1882498378395232e-05, "loss": 1.5234, "step": 22110 }, { "epoch": 0.7918419968843448, "grad_norm": 1.6729602813720703, "learning_rate": 2.1875257499811563e-05, "loss": 1.2955, "step": 22111 }, { "epoch": 0.791877809013913, "grad_norm": 1.8214075565338135, "learning_rate": 2.1868017672295537e-05, "loss": 1.5294, "step": 22112 }, { "epoch": 0.7919136211434813, "grad_norm": 1.2164210081100464, "learning_rate": 2.1860778895944566e-05, "loss": 1.3928, "step": 22113 }, { "epoch": 0.7919494332730496, "grad_norm": 1.5118670463562012, "learning_rate": 2.1853541170856007e-05, "loss": 1.39, "step": 22114 }, { "epoch": 0.7919852454026178, "grad_norm": 1.5947706699371338, "learning_rate": 2.1846304497127247e-05, "loss": 1.4102, "step": 22115 }, { "epoch": 0.7920210575321861, "grad_norm": 1.5643930435180664, "learning_rate": 2.183906887485565e-05, "loss": 1.5654, "step": 22116 }, { "epoch": 0.7920568696617545, "grad_norm": 1.5630208253860474, "learning_rate": 2.1831834304138587e-05, "loss": 1.5264, "step": 22117 }, { "epoch": 0.7920926817913228, "grad_norm": 2.1537749767303467, "learning_rate": 2.1824600785073335e-05, "loss": 1.46, "step": 22118 }, { "epoch": 0.792128493920891, "grad_norm": 1.5308321714401245, "learning_rate": 2.1817368317757235e-05, "loss": 1.3042, "step": 22119 }, { "epoch": 0.7921643060504593, "grad_norm": 1.5003904104232788, "learning_rate": 2.1810136902287625e-05, "loss": 0.9253, "step": 22120 }, { "epoch": 0.7922001181800276, "grad_norm": 1.9857511520385742, "learning_rate": 2.1802906538761748e-05, "loss": 1.3661, "step": 22121 }, { "epoch": 0.7922359303095958, "grad_norm": 2.4356906414031982, "learning_rate": 2.179567722727689e-05, "loss": 1.4112, "step": 22122 }, { "epoch": 0.7922717424391641, "grad_norm": 1.3355191946029663, "learning_rate": 2.178844896793032e-05, "loss": 1.0901, "step": 22123 }, { "epoch": 0.7923075545687325, "grad_norm": 1.3789863586425781, "learning_rate": 2.1781221760819303e-05, "loss": 1.6212, "step": 22124 }, { "epoch": 0.7923433666983007, "grad_norm": 1.8660190105438232, "learning_rate": 2.1773995606041044e-05, "loss": 1.2014, "step": 22125 }, { "epoch": 0.792379178827869, "grad_norm": 1.63323175907135, "learning_rate": 2.1766770503692748e-05, "loss": 1.5686, "step": 22126 }, { "epoch": 0.7924149909574373, "grad_norm": 1.8331819772720337, "learning_rate": 2.1759546453871647e-05, "loss": 1.3657, "step": 22127 }, { "epoch": 0.7924508030870056, "grad_norm": 1.5052270889282227, "learning_rate": 2.1752323456674962e-05, "loss": 1.0748, "step": 22128 }, { "epoch": 0.7924866152165738, "grad_norm": 1.5629814863204956, "learning_rate": 2.1745101512199806e-05, "loss": 1.4885, "step": 22129 }, { "epoch": 0.7925224273461421, "grad_norm": 1.492061734199524, "learning_rate": 2.173788062054336e-05, "loss": 1.3173, "step": 22130 }, { "epoch": 0.7925582394757105, "grad_norm": 1.879479169845581, "learning_rate": 2.1730660781802804e-05, "loss": 1.4771, "step": 22131 }, { "epoch": 0.7925940516052787, "grad_norm": 1.4208660125732422, "learning_rate": 2.1723441996075223e-05, "loss": 1.3431, "step": 22132 }, { "epoch": 0.792629863734847, "grad_norm": 1.137665867805481, "learning_rate": 2.1716224263457763e-05, "loss": 1.3964, "step": 22133 }, { "epoch": 0.7926656758644153, "grad_norm": 1.7951220273971558, "learning_rate": 2.1709007584047524e-05, "loss": 1.682, "step": 22134 }, { "epoch": 0.7927014879939835, "grad_norm": 1.7697921991348267, "learning_rate": 2.170179195794163e-05, "loss": 1.3207, "step": 22135 }, { "epoch": 0.7927373001235518, "grad_norm": 1.775586724281311, "learning_rate": 2.1694577385237104e-05, "loss": 1.291, "step": 22136 }, { "epoch": 0.7927731122531201, "grad_norm": 1.5712333917617798, "learning_rate": 2.168736386603102e-05, "loss": 1.2253, "step": 22137 }, { "epoch": 0.7928089243826885, "grad_norm": 1.932370901107788, "learning_rate": 2.168015140042048e-05, "loss": 1.6061, "step": 22138 }, { "epoch": 0.7928447365122567, "grad_norm": 1.4659613370895386, "learning_rate": 2.167293998850244e-05, "loss": 1.1571, "step": 22139 }, { "epoch": 0.792880548641825, "grad_norm": 1.5071487426757812, "learning_rate": 2.1665729630373965e-05, "loss": 1.5654, "step": 22140 }, { "epoch": 0.7929163607713933, "grad_norm": 1.8615208864212036, "learning_rate": 2.165852032613205e-05, "loss": 1.2381, "step": 22141 }, { "epoch": 0.7929521729009615, "grad_norm": 1.6738359928131104, "learning_rate": 2.1651312075873718e-05, "loss": 1.3059, "step": 22142 }, { "epoch": 0.7929879850305298, "grad_norm": 1.4147003889083862, "learning_rate": 2.1644104879695892e-05, "loss": 1.2178, "step": 22143 }, { "epoch": 0.7930237971600981, "grad_norm": 1.8694628477096558, "learning_rate": 2.1636898737695567e-05, "loss": 1.411, "step": 22144 }, { "epoch": 0.7930596092896665, "grad_norm": 1.5314850807189941, "learning_rate": 2.1629693649969683e-05, "loss": 1.3889, "step": 22145 }, { "epoch": 0.7930954214192347, "grad_norm": 1.4080699682235718, "learning_rate": 2.1622489616615203e-05, "loss": 1.335, "step": 22146 }, { "epoch": 0.793131233548803, "grad_norm": 1.9649324417114258, "learning_rate": 2.1615286637729037e-05, "loss": 1.4941, "step": 22147 }, { "epoch": 0.7931670456783713, "grad_norm": 1.2882145643234253, "learning_rate": 2.1608084713408018e-05, "loss": 1.2824, "step": 22148 }, { "epoch": 0.7932028578079395, "grad_norm": 1.4714865684509277, "learning_rate": 2.1600883843749165e-05, "loss": 1.5583, "step": 22149 }, { "epoch": 0.7932386699375078, "grad_norm": 1.5543267726898193, "learning_rate": 2.159368402884926e-05, "loss": 1.4382, "step": 22150 }, { "epoch": 0.7932744820670761, "grad_norm": 1.4382576942443848, "learning_rate": 2.1586485268805225e-05, "loss": 1.3659, "step": 22151 }, { "epoch": 0.7933102941966444, "grad_norm": 1.7490313053131104, "learning_rate": 2.1579287563713823e-05, "loss": 1.0572, "step": 22152 }, { "epoch": 0.7933461063262127, "grad_norm": 1.7107819318771362, "learning_rate": 2.1572090913672017e-05, "loss": 1.3691, "step": 22153 }, { "epoch": 0.793381918455781, "grad_norm": 2.4976677894592285, "learning_rate": 2.1564895318776534e-05, "loss": 1.4157, "step": 22154 }, { "epoch": 0.7934177305853493, "grad_norm": 1.6344050168991089, "learning_rate": 2.1557700779124214e-05, "loss": 1.4745, "step": 22155 }, { "epoch": 0.7934535427149175, "grad_norm": 2.2682435512542725, "learning_rate": 2.1550507294811863e-05, "loss": 1.4652, "step": 22156 }, { "epoch": 0.7934893548444858, "grad_norm": 1.6466939449310303, "learning_rate": 2.1543314865936225e-05, "loss": 1.5305, "step": 22157 }, { "epoch": 0.7935251669740541, "grad_norm": 1.3412882089614868, "learning_rate": 2.1536123492594106e-05, "loss": 1.2984, "step": 22158 }, { "epoch": 0.7935609791036224, "grad_norm": 1.7352933883666992, "learning_rate": 2.1528933174882183e-05, "loss": 1.6905, "step": 22159 }, { "epoch": 0.7935967912331907, "grad_norm": 1.512288212776184, "learning_rate": 2.1521743912897296e-05, "loss": 1.3645, "step": 22160 }, { "epoch": 0.793632603362759, "grad_norm": 1.663425326347351, "learning_rate": 2.1514555706736084e-05, "loss": 1.2875, "step": 22161 }, { "epoch": 0.7936684154923273, "grad_norm": 1.7665363550186157, "learning_rate": 2.1507368556495323e-05, "loss": 1.4851, "step": 22162 }, { "epoch": 0.7937042276218955, "grad_norm": 1.8287420272827148, "learning_rate": 2.150018246227161e-05, "loss": 1.4071, "step": 22163 }, { "epoch": 0.7937400397514638, "grad_norm": 1.557187557220459, "learning_rate": 2.1492997424161744e-05, "loss": 1.3892, "step": 22164 }, { "epoch": 0.7937758518810321, "grad_norm": 1.6745768785476685, "learning_rate": 2.1485813442262325e-05, "loss": 1.1793, "step": 22165 }, { "epoch": 0.7938116640106004, "grad_norm": 2.1988525390625, "learning_rate": 2.1478630516669952e-05, "loss": 1.5253, "step": 22166 }, { "epoch": 0.7938474761401687, "grad_norm": 1.7745083570480347, "learning_rate": 2.1471448647481384e-05, "loss": 1.0861, "step": 22167 }, { "epoch": 0.793883288269737, "grad_norm": 1.9180774688720703, "learning_rate": 2.1464267834793152e-05, "loss": 1.3288, "step": 22168 }, { "epoch": 0.7939191003993052, "grad_norm": 1.5834040641784668, "learning_rate": 2.1457088078701916e-05, "loss": 1.544, "step": 22169 }, { "epoch": 0.7939549125288735, "grad_norm": 1.3772550821304321, "learning_rate": 2.144990937930419e-05, "loss": 1.4562, "step": 22170 }, { "epoch": 0.7939907246584418, "grad_norm": 1.783267855644226, "learning_rate": 2.1442731736696666e-05, "loss": 1.5635, "step": 22171 }, { "epoch": 0.79402653678801, "grad_norm": 1.4774640798568726, "learning_rate": 2.143555515097583e-05, "loss": 1.3106, "step": 22172 }, { "epoch": 0.7940623489175784, "grad_norm": 1.496637225151062, "learning_rate": 2.1428379622238283e-05, "loss": 1.4957, "step": 22173 }, { "epoch": 0.7940981610471467, "grad_norm": 2.0105698108673096, "learning_rate": 2.1421205150580514e-05, "loss": 1.9181, "step": 22174 }, { "epoch": 0.794133973176715, "grad_norm": 1.443631887435913, "learning_rate": 2.1414031736099072e-05, "loss": 1.3103, "step": 22175 }, { "epoch": 0.7941697853062832, "grad_norm": 1.856333613395691, "learning_rate": 2.1406859378890486e-05, "loss": 1.6582, "step": 22176 }, { "epoch": 0.7942055974358515, "grad_norm": 1.6512917280197144, "learning_rate": 2.1399688079051205e-05, "loss": 1.1163, "step": 22177 }, { "epoch": 0.7942414095654198, "grad_norm": 1.48997962474823, "learning_rate": 2.1392517836677738e-05, "loss": 1.2154, "step": 22178 }, { "epoch": 0.794277221694988, "grad_norm": 1.7168711423873901, "learning_rate": 2.1385348651866542e-05, "loss": 1.384, "step": 22179 }, { "epoch": 0.7943130338245564, "grad_norm": 1.7471309900283813, "learning_rate": 2.13781805247141e-05, "loss": 1.5881, "step": 22180 }, { "epoch": 0.7943488459541247, "grad_norm": 1.4693900346755981, "learning_rate": 2.137101345531677e-05, "loss": 1.5126, "step": 22181 }, { "epoch": 0.794384658083693, "grad_norm": 1.2849462032318115, "learning_rate": 2.136384744377109e-05, "loss": 1.44, "step": 22182 }, { "epoch": 0.7944204702132612, "grad_norm": 1.9792394638061523, "learning_rate": 2.135668249017341e-05, "loss": 1.6862, "step": 22183 }, { "epoch": 0.7944562823428295, "grad_norm": 2.0086636543273926, "learning_rate": 2.134951859462009e-05, "loss": 1.3668, "step": 22184 }, { "epoch": 0.7944920944723978, "grad_norm": 1.3767683506011963, "learning_rate": 2.1342355757207544e-05, "loss": 1.2261, "step": 22185 }, { "epoch": 0.794527906601966, "grad_norm": 1.7332990169525146, "learning_rate": 2.1335193978032152e-05, "loss": 1.233, "step": 22186 }, { "epoch": 0.7945637187315344, "grad_norm": 1.5471128225326538, "learning_rate": 2.1328033257190272e-05, "loss": 1.2935, "step": 22187 }, { "epoch": 0.7945995308611027, "grad_norm": 1.7943533658981323, "learning_rate": 2.13208735947782e-05, "loss": 1.4358, "step": 22188 }, { "epoch": 0.794635342990671, "grad_norm": 1.4228322505950928, "learning_rate": 2.1313714990892285e-05, "loss": 1.4599, "step": 22189 }, { "epoch": 0.7946711551202392, "grad_norm": 1.6347566843032837, "learning_rate": 2.1306557445628837e-05, "loss": 1.4877, "step": 22190 }, { "epoch": 0.7947069672498075, "grad_norm": 1.4751968383789062, "learning_rate": 2.1299400959084183e-05, "loss": 1.5368, "step": 22191 }, { "epoch": 0.7947427793793758, "grad_norm": 1.509900689125061, "learning_rate": 2.1292245531354538e-05, "loss": 1.2897, "step": 22192 }, { "epoch": 0.794778591508944, "grad_norm": 1.7221992015838623, "learning_rate": 2.128509116253621e-05, "loss": 1.5262, "step": 22193 }, { "epoch": 0.7948144036385124, "grad_norm": 1.6442131996154785, "learning_rate": 2.1277937852725472e-05, "loss": 1.1642, "step": 22194 }, { "epoch": 0.7948502157680807, "grad_norm": 1.5237714052200317, "learning_rate": 2.1270785602018505e-05, "loss": 1.2724, "step": 22195 }, { "epoch": 0.794886027897649, "grad_norm": 2.1376285552978516, "learning_rate": 2.1263634410511567e-05, "loss": 1.4343, "step": 22196 }, { "epoch": 0.7949218400272172, "grad_norm": 2.232311487197876, "learning_rate": 2.125648427830086e-05, "loss": 1.6066, "step": 22197 }, { "epoch": 0.7949576521567855, "grad_norm": 1.5320496559143066, "learning_rate": 2.1249335205482613e-05, "loss": 1.4768, "step": 22198 }, { "epoch": 0.7949934642863538, "grad_norm": 1.4314378499984741, "learning_rate": 2.1242187192152964e-05, "loss": 1.3451, "step": 22199 }, { "epoch": 0.795029276415922, "grad_norm": 1.5462158918380737, "learning_rate": 2.1235040238408087e-05, "loss": 1.6463, "step": 22200 }, { "epoch": 0.7950650885454904, "grad_norm": 1.7351806163787842, "learning_rate": 2.122789434434417e-05, "loss": 1.5408, "step": 22201 }, { "epoch": 0.7951009006750587, "grad_norm": 1.6399816274642944, "learning_rate": 2.1220749510057304e-05, "loss": 1.4684, "step": 22202 }, { "epoch": 0.7951367128046269, "grad_norm": 2.056469440460205, "learning_rate": 2.1213605735643625e-05, "loss": 1.8509, "step": 22203 }, { "epoch": 0.7951725249341952, "grad_norm": 1.7393684387207031, "learning_rate": 2.1206463021199263e-05, "loss": 1.3368, "step": 22204 }, { "epoch": 0.7952083370637635, "grad_norm": 1.5154002904891968, "learning_rate": 2.1199321366820336e-05, "loss": 1.4059, "step": 22205 }, { "epoch": 0.7952441491933318, "grad_norm": 2.056178569793701, "learning_rate": 2.1192180772602867e-05, "loss": 1.588, "step": 22206 }, { "epoch": 0.7952799613229, "grad_norm": 1.605133295059204, "learning_rate": 2.1185041238642934e-05, "loss": 1.3137, "step": 22207 }, { "epoch": 0.7953157734524684, "grad_norm": 1.9872175455093384, "learning_rate": 2.117790276503665e-05, "loss": 1.3687, "step": 22208 }, { "epoch": 0.7953515855820367, "grad_norm": 1.4879945516586304, "learning_rate": 2.1170765351879985e-05, "loss": 1.5544, "step": 22209 }, { "epoch": 0.7953873977116049, "grad_norm": 1.7017463445663452, "learning_rate": 2.116362899926898e-05, "loss": 1.3034, "step": 22210 }, { "epoch": 0.7954232098411732, "grad_norm": 1.7935137748718262, "learning_rate": 2.1156493707299664e-05, "loss": 1.1258, "step": 22211 }, { "epoch": 0.7954590219707415, "grad_norm": 2.020554780960083, "learning_rate": 2.1149359476068043e-05, "loss": 1.7252, "step": 22212 }, { "epoch": 0.7954948341003097, "grad_norm": 2.046848773956299, "learning_rate": 2.1142226305670054e-05, "loss": 1.5382, "step": 22213 }, { "epoch": 0.795530646229878, "grad_norm": 1.9651658535003662, "learning_rate": 2.1135094196201698e-05, "loss": 1.5724, "step": 22214 }, { "epoch": 0.7955664583594464, "grad_norm": 1.6604115962982178, "learning_rate": 2.112796314775892e-05, "loss": 1.4821, "step": 22215 }, { "epoch": 0.7956022704890147, "grad_norm": 1.4332162141799927, "learning_rate": 2.112083316043768e-05, "loss": 1.5424, "step": 22216 }, { "epoch": 0.7956380826185829, "grad_norm": 1.3632845878601074, "learning_rate": 2.1113704234333866e-05, "loss": 1.4333, "step": 22217 }, { "epoch": 0.7956738947481512, "grad_norm": 1.6020152568817139, "learning_rate": 2.1106576369543395e-05, "loss": 1.3168, "step": 22218 }, { "epoch": 0.7957097068777195, "grad_norm": 1.4405721426010132, "learning_rate": 2.109944956616221e-05, "loss": 1.3853, "step": 22219 }, { "epoch": 0.7957455190072877, "grad_norm": 1.601324439048767, "learning_rate": 2.109232382428612e-05, "loss": 1.4282, "step": 22220 }, { "epoch": 0.795781331136856, "grad_norm": 1.5386035442352295, "learning_rate": 2.1085199144011037e-05, "loss": 1.5231, "step": 22221 }, { "epoch": 0.7958171432664244, "grad_norm": 1.4716813564300537, "learning_rate": 2.1078075525432805e-05, "loss": 1.3985, "step": 22222 }, { "epoch": 0.7958529553959927, "grad_norm": 2.0186898708343506, "learning_rate": 2.1070952968647296e-05, "loss": 1.3269, "step": 22223 }, { "epoch": 0.7958887675255609, "grad_norm": 1.4052791595458984, "learning_rate": 2.1063831473750272e-05, "loss": 1.1185, "step": 22224 }, { "epoch": 0.7959245796551292, "grad_norm": 1.8976056575775146, "learning_rate": 2.1056711040837574e-05, "loss": 1.6696, "step": 22225 }, { "epoch": 0.7959603917846975, "grad_norm": 1.5097455978393555, "learning_rate": 2.104959167000503e-05, "loss": 1.5236, "step": 22226 }, { "epoch": 0.7959962039142657, "grad_norm": 1.326099157333374, "learning_rate": 2.104247336134836e-05, "loss": 1.2501, "step": 22227 }, { "epoch": 0.796032016043834, "grad_norm": 1.6710323095321655, "learning_rate": 2.103535611496337e-05, "loss": 1.243, "step": 22228 }, { "epoch": 0.7960678281734024, "grad_norm": 2.964059829711914, "learning_rate": 2.1028239930945794e-05, "loss": 1.1575, "step": 22229 }, { "epoch": 0.7961036403029706, "grad_norm": 1.9996397495269775, "learning_rate": 2.1021124809391423e-05, "loss": 1.2896, "step": 22230 }, { "epoch": 0.7961394524325389, "grad_norm": 2.0370073318481445, "learning_rate": 2.1014010750395907e-05, "loss": 1.5759, "step": 22231 }, { "epoch": 0.7961752645621072, "grad_norm": 2.4355692863464355, "learning_rate": 2.1006897754055e-05, "loss": 1.5545, "step": 22232 }, { "epoch": 0.7962110766916755, "grad_norm": 1.777365803718567, "learning_rate": 2.099978582046438e-05, "loss": 1.3352, "step": 22233 }, { "epoch": 0.7962468888212437, "grad_norm": 1.5381382703781128, "learning_rate": 2.099267494971977e-05, "loss": 1.5469, "step": 22234 }, { "epoch": 0.796282700950812, "grad_norm": 1.5520857572555542, "learning_rate": 2.0985565141916808e-05, "loss": 1.0467, "step": 22235 }, { "epoch": 0.7963185130803804, "grad_norm": 1.8397648334503174, "learning_rate": 2.097845639715109e-05, "loss": 1.5229, "step": 22236 }, { "epoch": 0.7963543252099486, "grad_norm": 1.6012263298034668, "learning_rate": 2.0971348715518368e-05, "loss": 1.1753, "step": 22237 }, { "epoch": 0.7963901373395169, "grad_norm": 1.5385147333145142, "learning_rate": 2.0964242097114184e-05, "loss": 1.825, "step": 22238 }, { "epoch": 0.7964259494690852, "grad_norm": 1.9621472358703613, "learning_rate": 2.0957136542034172e-05, "loss": 1.3496, "step": 22239 }, { "epoch": 0.7964617615986535, "grad_norm": 1.6801329851150513, "learning_rate": 2.0950032050373925e-05, "loss": 1.2043, "step": 22240 }, { "epoch": 0.7964975737282217, "grad_norm": 2.1597633361816406, "learning_rate": 2.0942928622229064e-05, "loss": 1.5174, "step": 22241 }, { "epoch": 0.79653338585779, "grad_norm": 1.5973130464553833, "learning_rate": 2.093582625769509e-05, "loss": 1.7511, "step": 22242 }, { "epoch": 0.7965691979873584, "grad_norm": 2.0921790599823, "learning_rate": 2.0928724956867585e-05, "loss": 1.3482, "step": 22243 }, { "epoch": 0.7966050101169266, "grad_norm": 2.008749008178711, "learning_rate": 2.0921624719842126e-05, "loss": 1.5626, "step": 22244 }, { "epoch": 0.7966408222464949, "grad_norm": 1.3919188976287842, "learning_rate": 2.091452554671417e-05, "loss": 1.3621, "step": 22245 }, { "epoch": 0.7966766343760632, "grad_norm": 2.2559168338775635, "learning_rate": 2.0907427437579287e-05, "loss": 1.3592, "step": 22246 }, { "epoch": 0.7967124465056314, "grad_norm": 1.4418983459472656, "learning_rate": 2.0900330392532895e-05, "loss": 1.529, "step": 22247 }, { "epoch": 0.7967482586351997, "grad_norm": 1.5940090417861938, "learning_rate": 2.089323441167058e-05, "loss": 1.6447, "step": 22248 }, { "epoch": 0.796784070764768, "grad_norm": 1.591691255569458, "learning_rate": 2.088613949508772e-05, "loss": 1.244, "step": 22249 }, { "epoch": 0.7968198828943364, "grad_norm": 1.6028300523757935, "learning_rate": 2.0879045642879814e-05, "loss": 1.3213, "step": 22250 }, { "epoch": 0.7968556950239046, "grad_norm": 1.5213801860809326, "learning_rate": 2.0871952855142286e-05, "loss": 1.2998, "step": 22251 }, { "epoch": 0.7968915071534729, "grad_norm": 3.0959925651550293, "learning_rate": 2.0864861131970594e-05, "loss": 1.4941, "step": 22252 }, { "epoch": 0.7969273192830412, "grad_norm": 1.3404817581176758, "learning_rate": 2.0857770473460115e-05, "loss": 1.2366, "step": 22253 }, { "epoch": 0.7969631314126094, "grad_norm": 2.0209555625915527, "learning_rate": 2.08506808797062e-05, "loss": 1.3125, "step": 22254 }, { "epoch": 0.7969989435421777, "grad_norm": 1.8329658508300781, "learning_rate": 2.084359235080433e-05, "loss": 1.4451, "step": 22255 }, { "epoch": 0.797034755671746, "grad_norm": 1.6439001560211182, "learning_rate": 2.0836504886849785e-05, "loss": 1.7872, "step": 22256 }, { "epoch": 0.7970705678013144, "grad_norm": 2.4358198642730713, "learning_rate": 2.082941848793799e-05, "loss": 1.4332, "step": 22257 }, { "epoch": 0.7971063799308826, "grad_norm": 1.5745573043823242, "learning_rate": 2.0822333154164187e-05, "loss": 1.1289, "step": 22258 }, { "epoch": 0.7971421920604509, "grad_norm": 1.8613457679748535, "learning_rate": 2.0815248885623817e-05, "loss": 1.2301, "step": 22259 }, { "epoch": 0.7971780041900192, "grad_norm": 1.6447163820266724, "learning_rate": 2.0808165682412107e-05, "loss": 1.2927, "step": 22260 }, { "epoch": 0.7972138163195874, "grad_norm": 2.150489568710327, "learning_rate": 2.08010835446244e-05, "loss": 1.5737, "step": 22261 }, { "epoch": 0.7972496284491557, "grad_norm": 2.0756678581237793, "learning_rate": 2.0794002472355933e-05, "loss": 1.6523, "step": 22262 }, { "epoch": 0.797285440578724, "grad_norm": 1.8237303495407104, "learning_rate": 2.0786922465701997e-05, "loss": 1.5846, "step": 22263 }, { "epoch": 0.7973212527082923, "grad_norm": 1.4604321718215942, "learning_rate": 2.0779843524757858e-05, "loss": 1.128, "step": 22264 }, { "epoch": 0.7973570648378606, "grad_norm": 1.7635831832885742, "learning_rate": 2.0772765649618688e-05, "loss": 1.3801, "step": 22265 }, { "epoch": 0.7973928769674289, "grad_norm": 2.052753448486328, "learning_rate": 2.0765688840379816e-05, "loss": 1.3685, "step": 22266 }, { "epoch": 0.7974286890969972, "grad_norm": 1.3673149347305298, "learning_rate": 2.075861309713637e-05, "loss": 0.9523, "step": 22267 }, { "epoch": 0.7974645012265654, "grad_norm": 2.1413803100585938, "learning_rate": 2.0751538419983598e-05, "loss": 1.1637, "step": 22268 }, { "epoch": 0.7975003133561337, "grad_norm": 1.6249077320098877, "learning_rate": 2.0744464809016593e-05, "loss": 1.676, "step": 22269 }, { "epoch": 0.797536125485702, "grad_norm": 1.6026825904846191, "learning_rate": 2.0737392264330635e-05, "loss": 1.188, "step": 22270 }, { "epoch": 0.7975719376152703, "grad_norm": 1.4662625789642334, "learning_rate": 2.073032078602083e-05, "loss": 1.4825, "step": 22271 }, { "epoch": 0.7976077497448386, "grad_norm": 1.4503960609436035, "learning_rate": 2.0723250374182278e-05, "loss": 1.6084, "step": 22272 }, { "epoch": 0.7976435618744069, "grad_norm": 1.5225666761398315, "learning_rate": 2.071618102891013e-05, "loss": 1.307, "step": 22273 }, { "epoch": 0.7976793740039752, "grad_norm": 1.6680306196212769, "learning_rate": 2.070911275029951e-05, "loss": 1.533, "step": 22274 }, { "epoch": 0.7977151861335434, "grad_norm": 2.219346761703491, "learning_rate": 2.0702045538445515e-05, "loss": 1.7578, "step": 22275 }, { "epoch": 0.7977509982631117, "grad_norm": 1.6483958959579468, "learning_rate": 2.069497939344316e-05, "loss": 1.3416, "step": 22276 }, { "epoch": 0.79778681039268, "grad_norm": 1.3028994798660278, "learning_rate": 2.0687914315387613e-05, "loss": 1.5492, "step": 22277 }, { "epoch": 0.7978226225222483, "grad_norm": 1.6726864576339722, "learning_rate": 2.0680850304373843e-05, "loss": 1.4142, "step": 22278 }, { "epoch": 0.7978584346518166, "grad_norm": 1.879017949104309, "learning_rate": 2.0673787360496954e-05, "loss": 1.5515, "step": 22279 }, { "epoch": 0.7978942467813849, "grad_norm": 1.7818199396133423, "learning_rate": 2.06667254838519e-05, "loss": 1.4629, "step": 22280 }, { "epoch": 0.7979300589109531, "grad_norm": 1.6430115699768066, "learning_rate": 2.0659664674533728e-05, "loss": 1.6182, "step": 22281 }, { "epoch": 0.7979658710405214, "grad_norm": 2.086963176727295, "learning_rate": 2.065260493263744e-05, "loss": 1.5188, "step": 22282 }, { "epoch": 0.7980016831700897, "grad_norm": 1.5534586906433105, "learning_rate": 2.0645546258257987e-05, "loss": 1.5905, "step": 22283 }, { "epoch": 0.798037495299658, "grad_norm": 1.8073506355285645, "learning_rate": 2.063848865149035e-05, "loss": 1.5429, "step": 22284 }, { "epoch": 0.7980733074292263, "grad_norm": 1.6143696308135986, "learning_rate": 2.0631432112429473e-05, "loss": 1.5414, "step": 22285 }, { "epoch": 0.7981091195587946, "grad_norm": 1.9073354005813599, "learning_rate": 2.062437664117033e-05, "loss": 1.6396, "step": 22286 }, { "epoch": 0.7981449316883629, "grad_norm": 1.7292273044586182, "learning_rate": 2.061732223780778e-05, "loss": 1.3851, "step": 22287 }, { "epoch": 0.7981807438179311, "grad_norm": 1.2550760507583618, "learning_rate": 2.061026890243677e-05, "loss": 1.3337, "step": 22288 }, { "epoch": 0.7982165559474994, "grad_norm": 1.5007202625274658, "learning_rate": 2.06032166351522e-05, "loss": 1.1814, "step": 22289 }, { "epoch": 0.7982523680770677, "grad_norm": 1.4860681295394897, "learning_rate": 2.059616543604892e-05, "loss": 1.4702, "step": 22290 }, { "epoch": 0.7982881802066359, "grad_norm": 1.7105454206466675, "learning_rate": 2.0589115305221807e-05, "loss": 1.556, "step": 22291 }, { "epoch": 0.7983239923362043, "grad_norm": 1.458163857460022, "learning_rate": 2.058206624276572e-05, "loss": 1.5388, "step": 22292 }, { "epoch": 0.7983598044657726, "grad_norm": 1.4146854877471924, "learning_rate": 2.0575018248775513e-05, "loss": 1.6969, "step": 22293 }, { "epoch": 0.7983956165953409, "grad_norm": 1.3511170148849487, "learning_rate": 2.056797132334596e-05, "loss": 1.4063, "step": 22294 }, { "epoch": 0.7984314287249091, "grad_norm": 1.6224746704101562, "learning_rate": 2.056092546657189e-05, "loss": 1.4332, "step": 22295 }, { "epoch": 0.7984672408544774, "grad_norm": 1.4703950881958008, "learning_rate": 2.0553880678548097e-05, "loss": 1.3809, "step": 22296 }, { "epoch": 0.7985030529840457, "grad_norm": 1.9702130556106567, "learning_rate": 2.0546836959369387e-05, "loss": 1.431, "step": 22297 }, { "epoch": 0.7985388651136139, "grad_norm": 1.900384783744812, "learning_rate": 2.0539794309130478e-05, "loss": 1.1501, "step": 22298 }, { "epoch": 0.7985746772431823, "grad_norm": 1.9203412532806396, "learning_rate": 2.0532752727926142e-05, "loss": 1.6926, "step": 22299 }, { "epoch": 0.7986104893727506, "grad_norm": 1.5627378225326538, "learning_rate": 2.0525712215851132e-05, "loss": 1.3586, "step": 22300 }, { "epoch": 0.7986463015023189, "grad_norm": 1.3566069602966309, "learning_rate": 2.0518672773000124e-05, "loss": 1.417, "step": 22301 }, { "epoch": 0.7986821136318871, "grad_norm": 2.314469814300537, "learning_rate": 2.051163439946786e-05, "loss": 1.7983, "step": 22302 }, { "epoch": 0.7987179257614554, "grad_norm": 1.9831676483154297, "learning_rate": 2.050459709534901e-05, "loss": 1.6987, "step": 22303 }, { "epoch": 0.7987537378910237, "grad_norm": 2.6953423023223877, "learning_rate": 2.0497560860738295e-05, "loss": 1.2652, "step": 22304 }, { "epoch": 0.7987895500205919, "grad_norm": 1.602420449256897, "learning_rate": 2.0490525695730323e-05, "loss": 1.4029, "step": 22305 }, { "epoch": 0.7988253621501603, "grad_norm": 1.531333565711975, "learning_rate": 2.048349160041977e-05, "loss": 1.3618, "step": 22306 }, { "epoch": 0.7988611742797286, "grad_norm": 1.4855061769485474, "learning_rate": 2.0476458574901293e-05, "loss": 1.2672, "step": 22307 }, { "epoch": 0.7988969864092968, "grad_norm": 2.015868663787842, "learning_rate": 2.046942661926946e-05, "loss": 1.4248, "step": 22308 }, { "epoch": 0.7989327985388651, "grad_norm": 1.3681994676589966, "learning_rate": 2.04623957336189e-05, "loss": 1.1357, "step": 22309 }, { "epoch": 0.7989686106684334, "grad_norm": 2.770206928253174, "learning_rate": 2.0455365918044224e-05, "loss": 1.257, "step": 22310 }, { "epoch": 0.7990044227980017, "grad_norm": 1.8386939764022827, "learning_rate": 2.044833717264001e-05, "loss": 1.4291, "step": 22311 }, { "epoch": 0.7990402349275699, "grad_norm": 1.7308319807052612, "learning_rate": 2.044130949750077e-05, "loss": 1.3881, "step": 22312 }, { "epoch": 0.7990760470571383, "grad_norm": 2.2315123081207275, "learning_rate": 2.04342828927211e-05, "loss": 1.3908, "step": 22313 }, { "epoch": 0.7991118591867066, "grad_norm": 1.8249056339263916, "learning_rate": 2.0427257358395546e-05, "loss": 1.5637, "step": 22314 }, { "epoch": 0.7991476713162748, "grad_norm": 1.5045298337936401, "learning_rate": 2.0420232894618573e-05, "loss": 1.42, "step": 22315 }, { "epoch": 0.7991834834458431, "grad_norm": 1.8735840320587158, "learning_rate": 2.041320950148472e-05, "loss": 1.581, "step": 22316 }, { "epoch": 0.7992192955754114, "grad_norm": 1.6931583881378174, "learning_rate": 2.0406187179088477e-05, "loss": 1.4165, "step": 22317 }, { "epoch": 0.7992551077049797, "grad_norm": 2.089118719100952, "learning_rate": 2.0399165927524334e-05, "loss": 1.4857, "step": 22318 }, { "epoch": 0.7992909198345479, "grad_norm": 1.5633699893951416, "learning_rate": 2.0392145746886714e-05, "loss": 1.5717, "step": 22319 }, { "epoch": 0.7993267319641163, "grad_norm": 1.8018674850463867, "learning_rate": 2.038512663727009e-05, "loss": 1.4133, "step": 22320 }, { "epoch": 0.7993625440936846, "grad_norm": 1.3811540603637695, "learning_rate": 2.0378108598768887e-05, "loss": 1.2667, "step": 22321 }, { "epoch": 0.7993983562232528, "grad_norm": 1.6007475852966309, "learning_rate": 2.0371091631477557e-05, "loss": 1.2712, "step": 22322 }, { "epoch": 0.7994341683528211, "grad_norm": 1.5358997583389282, "learning_rate": 2.036407573549044e-05, "loss": 1.5853, "step": 22323 }, { "epoch": 0.7994699804823894, "grad_norm": 2.150416374206543, "learning_rate": 2.035706091090197e-05, "loss": 1.3255, "step": 22324 }, { "epoch": 0.7995057926119576, "grad_norm": 1.9435093402862549, "learning_rate": 2.035004715780654e-05, "loss": 1.1444, "step": 22325 }, { "epoch": 0.7995416047415259, "grad_norm": 1.7371599674224854, "learning_rate": 2.0343034476298452e-05, "loss": 1.5803, "step": 22326 }, { "epoch": 0.7995774168710943, "grad_norm": 1.5360907316207886, "learning_rate": 2.0336022866472092e-05, "loss": 1.5967, "step": 22327 }, { "epoch": 0.7996132290006626, "grad_norm": 1.229755163192749, "learning_rate": 2.0329012328421783e-05, "loss": 1.2461, "step": 22328 }, { "epoch": 0.7996490411302308, "grad_norm": 1.8806687593460083, "learning_rate": 2.0322002862241863e-05, "loss": 1.5251, "step": 22329 }, { "epoch": 0.7996848532597991, "grad_norm": 1.3057172298431396, "learning_rate": 2.0314994468026606e-05, "loss": 1.4201, "step": 22330 }, { "epoch": 0.7997206653893674, "grad_norm": 1.5392056703567505, "learning_rate": 2.03079871458703e-05, "loss": 1.6202, "step": 22331 }, { "epoch": 0.7997564775189356, "grad_norm": 1.719313621520996, "learning_rate": 2.0300980895867263e-05, "loss": 1.3479, "step": 22332 }, { "epoch": 0.7997922896485039, "grad_norm": 1.619842767715454, "learning_rate": 2.029397571811169e-05, "loss": 1.5288, "step": 22333 }, { "epoch": 0.7998281017780723, "grad_norm": 2.1151888370513916, "learning_rate": 2.0286971612697902e-05, "loss": 1.6217, "step": 22334 }, { "epoch": 0.7998639139076406, "grad_norm": 1.7302162647247314, "learning_rate": 2.027996857972002e-05, "loss": 1.2988, "step": 22335 }, { "epoch": 0.7998997260372088, "grad_norm": 1.6297879219055176, "learning_rate": 2.0272966619272392e-05, "loss": 1.0922, "step": 22336 }, { "epoch": 0.7999355381667771, "grad_norm": 1.6117581129074097, "learning_rate": 2.026596573144913e-05, "loss": 0.9956, "step": 22337 }, { "epoch": 0.7999713502963454, "grad_norm": 1.5663176774978638, "learning_rate": 2.025896591634444e-05, "loss": 1.3235, "step": 22338 }, { "epoch": 0.8000071624259136, "grad_norm": 1.4366532564163208, "learning_rate": 2.0251967174052523e-05, "loss": 1.0999, "step": 22339 }, { "epoch": 0.8000429745554819, "grad_norm": 1.7538988590240479, "learning_rate": 2.024496950466753e-05, "loss": 1.7191, "step": 22340 }, { "epoch": 0.8000787866850503, "grad_norm": 1.5960266590118408, "learning_rate": 2.023797290828361e-05, "loss": 1.3942, "step": 22341 }, { "epoch": 0.8001145988146185, "grad_norm": 1.8154847621917725, "learning_rate": 2.0230977384994808e-05, "loss": 1.1857, "step": 22342 }, { "epoch": 0.8001504109441868, "grad_norm": 1.5289015769958496, "learning_rate": 2.022398293489538e-05, "loss": 1.6854, "step": 22343 }, { "epoch": 0.8001862230737551, "grad_norm": 1.8163173198699951, "learning_rate": 2.0216989558079326e-05, "loss": 1.5605, "step": 22344 }, { "epoch": 0.8002220352033234, "grad_norm": 1.35292387008667, "learning_rate": 2.020999725464079e-05, "loss": 1.3525, "step": 22345 }, { "epoch": 0.8002578473328916, "grad_norm": 1.5341395139694214, "learning_rate": 2.0203006024673764e-05, "loss": 1.326, "step": 22346 }, { "epoch": 0.8002936594624599, "grad_norm": 1.8680927753448486, "learning_rate": 2.0196015868272412e-05, "loss": 1.5703, "step": 22347 }, { "epoch": 0.8003294715920283, "grad_norm": 1.65446937084198, "learning_rate": 2.0189026785530705e-05, "loss": 1.2258, "step": 22348 }, { "epoch": 0.8003652837215965, "grad_norm": 2.1270790100097656, "learning_rate": 2.01820387765427e-05, "loss": 1.492, "step": 22349 }, { "epoch": 0.8004010958511648, "grad_norm": 1.5399971008300781, "learning_rate": 2.0175051841402426e-05, "loss": 1.5374, "step": 22350 }, { "epoch": 0.8004369079807331, "grad_norm": 1.526645302772522, "learning_rate": 2.016806598020383e-05, "loss": 1.2816, "step": 22351 }, { "epoch": 0.8004727201103014, "grad_norm": 1.623569130897522, "learning_rate": 2.0161081193040964e-05, "loss": 1.244, "step": 22352 }, { "epoch": 0.8005085322398696, "grad_norm": 1.273235559463501, "learning_rate": 2.0154097480007716e-05, "loss": 1.1387, "step": 22353 }, { "epoch": 0.8005443443694379, "grad_norm": 1.4888707399368286, "learning_rate": 2.0147114841198144e-05, "loss": 1.4681, "step": 22354 }, { "epoch": 0.8005801564990063, "grad_norm": 2.1212549209594727, "learning_rate": 2.014013327670611e-05, "loss": 1.2677, "step": 22355 }, { "epoch": 0.8006159686285745, "grad_norm": 2.3081042766571045, "learning_rate": 2.0133152786625598e-05, "loss": 1.5632, "step": 22356 }, { "epoch": 0.8006517807581428, "grad_norm": 1.2556418180465698, "learning_rate": 2.012617337105044e-05, "loss": 1.5076, "step": 22357 }, { "epoch": 0.8006875928877111, "grad_norm": 1.5202817916870117, "learning_rate": 2.0119195030074645e-05, "loss": 1.3706, "step": 22358 }, { "epoch": 0.8007234050172793, "grad_norm": 1.4703209400177002, "learning_rate": 2.011221776379204e-05, "loss": 1.6011, "step": 22359 }, { "epoch": 0.8007592171468476, "grad_norm": 2.0287888050079346, "learning_rate": 2.0105241572296463e-05, "loss": 1.3434, "step": 22360 }, { "epoch": 0.8007950292764159, "grad_norm": 1.8298935890197754, "learning_rate": 2.0098266455681812e-05, "loss": 1.6273, "step": 22361 }, { "epoch": 0.8008308414059843, "grad_norm": 1.9720675945281982, "learning_rate": 2.009129241404192e-05, "loss": 1.537, "step": 22362 }, { "epoch": 0.8008666535355525, "grad_norm": 1.583101749420166, "learning_rate": 2.0084319447470645e-05, "loss": 1.4068, "step": 22363 }, { "epoch": 0.8009024656651208, "grad_norm": 2.1829562187194824, "learning_rate": 2.007734755606171e-05, "loss": 1.4279, "step": 22364 }, { "epoch": 0.8009382777946891, "grad_norm": 1.5320464372634888, "learning_rate": 2.0070376739909024e-05, "loss": 1.4431, "step": 22365 }, { "epoch": 0.8009740899242573, "grad_norm": 1.512054443359375, "learning_rate": 2.0063406999106293e-05, "loss": 1.4185, "step": 22366 }, { "epoch": 0.8010099020538256, "grad_norm": 1.5620276927947998, "learning_rate": 2.005643833374733e-05, "loss": 1.3393, "step": 22367 }, { "epoch": 0.8010457141833939, "grad_norm": 1.6267145872116089, "learning_rate": 2.0049470743925845e-05, "loss": 1.2916, "step": 22368 }, { "epoch": 0.8010815263129623, "grad_norm": 1.743434190750122, "learning_rate": 2.0042504229735604e-05, "loss": 1.4186, "step": 22369 }, { "epoch": 0.8011173384425305, "grad_norm": 1.7056375741958618, "learning_rate": 2.0035538791270358e-05, "loss": 1.3937, "step": 22370 }, { "epoch": 0.8011531505720988, "grad_norm": 1.7276453971862793, "learning_rate": 2.002857442862377e-05, "loss": 1.4292, "step": 22371 }, { "epoch": 0.8011889627016671, "grad_norm": 1.376869797706604, "learning_rate": 2.002161114188955e-05, "loss": 1.5486, "step": 22372 }, { "epoch": 0.8012247748312353, "grad_norm": 1.5992333889007568, "learning_rate": 2.0014648931161386e-05, "loss": 1.3535, "step": 22373 }, { "epoch": 0.8012605869608036, "grad_norm": 1.8465687036514282, "learning_rate": 2.000768779653298e-05, "loss": 1.1694, "step": 22374 }, { "epoch": 0.8012963990903719, "grad_norm": 1.4649271965026855, "learning_rate": 2.000072773809789e-05, "loss": 1.6326, "step": 22375 }, { "epoch": 0.8013322112199402, "grad_norm": 1.352648377418518, "learning_rate": 1.9993768755949882e-05, "loss": 1.5192, "step": 22376 }, { "epoch": 0.8013680233495085, "grad_norm": 1.7649281024932861, "learning_rate": 1.99868108501825e-05, "loss": 1.1998, "step": 22377 }, { "epoch": 0.8014038354790768, "grad_norm": 1.4050350189208984, "learning_rate": 1.9979854020889356e-05, "loss": 1.4443, "step": 22378 }, { "epoch": 0.8014396476086451, "grad_norm": 1.8340182304382324, "learning_rate": 1.9972898268164052e-05, "loss": 1.5995, "step": 22379 }, { "epoch": 0.8014754597382133, "grad_norm": 1.7190074920654297, "learning_rate": 1.9965943592100166e-05, "loss": 1.402, "step": 22380 }, { "epoch": 0.8015112718677816, "grad_norm": 2.5135326385498047, "learning_rate": 1.995898999279131e-05, "loss": 1.2719, "step": 22381 }, { "epoch": 0.8015470839973499, "grad_norm": 2.2622857093811035, "learning_rate": 1.9952037470330964e-05, "loss": 1.7018, "step": 22382 }, { "epoch": 0.8015828961269181, "grad_norm": 1.4016941785812378, "learning_rate": 1.994508602481271e-05, "loss": 1.4652, "step": 22383 }, { "epoch": 0.8016187082564865, "grad_norm": 1.9714092016220093, "learning_rate": 1.993813565633005e-05, "loss": 1.7556, "step": 22384 }, { "epoch": 0.8016545203860548, "grad_norm": 1.8598322868347168, "learning_rate": 1.993118636497654e-05, "loss": 1.633, "step": 22385 }, { "epoch": 0.801690332515623, "grad_norm": 1.6280977725982666, "learning_rate": 1.99242381508456e-05, "loss": 1.2321, "step": 22386 }, { "epoch": 0.8017261446451913, "grad_norm": 1.4744327068328857, "learning_rate": 1.9917291014030747e-05, "loss": 1.1498, "step": 22387 }, { "epoch": 0.8017619567747596, "grad_norm": 1.4115815162658691, "learning_rate": 1.991034495462547e-05, "loss": 1.2791, "step": 22388 }, { "epoch": 0.8017977689043279, "grad_norm": 1.562197208404541, "learning_rate": 1.990339997272317e-05, "loss": 1.2146, "step": 22389 }, { "epoch": 0.8018335810338961, "grad_norm": 2.2312514781951904, "learning_rate": 1.9896456068417302e-05, "loss": 1.4709, "step": 22390 }, { "epoch": 0.8018693931634645, "grad_norm": 1.673010230064392, "learning_rate": 1.9889513241801295e-05, "loss": 1.3413, "step": 22391 }, { "epoch": 0.8019052052930328, "grad_norm": 1.657200813293457, "learning_rate": 1.988257149296857e-05, "loss": 1.7043, "step": 22392 }, { "epoch": 0.801941017422601, "grad_norm": 1.4257102012634277, "learning_rate": 1.987563082201249e-05, "loss": 1.4941, "step": 22393 }, { "epoch": 0.8019768295521693, "grad_norm": 1.373645544052124, "learning_rate": 1.9868691229026437e-05, "loss": 1.5311, "step": 22394 }, { "epoch": 0.8020126416817376, "grad_norm": 1.3798229694366455, "learning_rate": 1.986175271410381e-05, "loss": 1.1315, "step": 22395 }, { "epoch": 0.8020484538113059, "grad_norm": 1.657395839691162, "learning_rate": 1.9854815277337902e-05, "loss": 1.2784, "step": 22396 }, { "epoch": 0.8020842659408741, "grad_norm": 2.0209593772888184, "learning_rate": 1.9847878918822073e-05, "loss": 1.2659, "step": 22397 }, { "epoch": 0.8021200780704425, "grad_norm": 1.9298338890075684, "learning_rate": 1.9840943638649635e-05, "loss": 1.2645, "step": 22398 }, { "epoch": 0.8021558902000108, "grad_norm": 1.6281800270080566, "learning_rate": 1.9834009436913948e-05, "loss": 1.4317, "step": 22399 }, { "epoch": 0.802191702329579, "grad_norm": 1.7945187091827393, "learning_rate": 1.9827076313708216e-05, "loss": 1.671, "step": 22400 }, { "epoch": 0.8022275144591473, "grad_norm": 1.9173634052276611, "learning_rate": 1.9820144269125763e-05, "loss": 1.6113, "step": 22401 }, { "epoch": 0.8022633265887156, "grad_norm": 1.6076704263687134, "learning_rate": 1.981321330325987e-05, "loss": 1.5395, "step": 22402 }, { "epoch": 0.8022991387182838, "grad_norm": 1.4223419427871704, "learning_rate": 1.980628341620373e-05, "loss": 1.3787, "step": 22403 }, { "epoch": 0.8023349508478521, "grad_norm": 1.6627603769302368, "learning_rate": 1.9799354608050614e-05, "loss": 1.4402, "step": 22404 }, { "epoch": 0.8023707629774205, "grad_norm": 1.7014139890670776, "learning_rate": 1.979242687889372e-05, "loss": 1.4458, "step": 22405 }, { "epoch": 0.8024065751069888, "grad_norm": 1.6976075172424316, "learning_rate": 1.9785500228826292e-05, "loss": 1.6349, "step": 22406 }, { "epoch": 0.802442387236557, "grad_norm": 1.68596351146698, "learning_rate": 1.977857465794146e-05, "loss": 1.7244, "step": 22407 }, { "epoch": 0.8024781993661253, "grad_norm": 2.4121077060699463, "learning_rate": 1.977165016633242e-05, "loss": 1.5179, "step": 22408 }, { "epoch": 0.8025140114956936, "grad_norm": 2.1352639198303223, "learning_rate": 1.9764726754092354e-05, "loss": 1.5501, "step": 22409 }, { "epoch": 0.8025498236252618, "grad_norm": 1.6876717805862427, "learning_rate": 1.975780442131442e-05, "loss": 1.5704, "step": 22410 }, { "epoch": 0.8025856357548301, "grad_norm": 1.326467752456665, "learning_rate": 1.9750883168091684e-05, "loss": 1.539, "step": 22411 }, { "epoch": 0.8026214478843985, "grad_norm": 1.339929223060608, "learning_rate": 1.9743962994517316e-05, "loss": 1.4927, "step": 22412 }, { "epoch": 0.8026572600139668, "grad_norm": 1.7197191715240479, "learning_rate": 1.9737043900684416e-05, "loss": 1.3768, "step": 22413 }, { "epoch": 0.802693072143535, "grad_norm": 1.7365750074386597, "learning_rate": 1.9730125886686033e-05, "loss": 1.2812, "step": 22414 }, { "epoch": 0.8027288842731033, "grad_norm": 1.6885199546813965, "learning_rate": 1.972320895261528e-05, "loss": 1.4585, "step": 22415 }, { "epoch": 0.8027646964026716, "grad_norm": 1.6332989931106567, "learning_rate": 1.9716293098565186e-05, "loss": 1.4209, "step": 22416 }, { "epoch": 0.8028005085322398, "grad_norm": 1.5932855606079102, "learning_rate": 1.9709378324628848e-05, "loss": 1.5682, "step": 22417 }, { "epoch": 0.8028363206618081, "grad_norm": 1.284571886062622, "learning_rate": 1.970246463089922e-05, "loss": 1.3149, "step": 22418 }, { "epoch": 0.8028721327913765, "grad_norm": 1.5631963014602661, "learning_rate": 1.9695552017469364e-05, "loss": 1.5476, "step": 22419 }, { "epoch": 0.8029079449209447, "grad_norm": 1.6943448781967163, "learning_rate": 1.9688640484432287e-05, "loss": 1.3432, "step": 22420 }, { "epoch": 0.802943757050513, "grad_norm": 1.4390662908554077, "learning_rate": 1.968173003188094e-05, "loss": 1.2311, "step": 22421 }, { "epoch": 0.8029795691800813, "grad_norm": 1.7541425228118896, "learning_rate": 1.96748206599083e-05, "loss": 1.4092, "step": 22422 }, { "epoch": 0.8030153813096496, "grad_norm": 1.399704098701477, "learning_rate": 1.9667912368607344e-05, "loss": 1.3498, "step": 22423 }, { "epoch": 0.8030511934392178, "grad_norm": 2.5454680919647217, "learning_rate": 1.9661005158071033e-05, "loss": 1.2805, "step": 22424 }, { "epoch": 0.8030870055687861, "grad_norm": 2.0977659225463867, "learning_rate": 1.965409902839225e-05, "loss": 1.7922, "step": 22425 }, { "epoch": 0.8031228176983545, "grad_norm": 1.8387500047683716, "learning_rate": 1.9647193979663915e-05, "loss": 1.3484, "step": 22426 }, { "epoch": 0.8031586298279227, "grad_norm": 1.7446051836013794, "learning_rate": 1.9640290011978935e-05, "loss": 1.3899, "step": 22427 }, { "epoch": 0.803194441957491, "grad_norm": 2.0197689533233643, "learning_rate": 1.9633387125430226e-05, "loss": 1.6515, "step": 22428 }, { "epoch": 0.8032302540870593, "grad_norm": 1.3845274448394775, "learning_rate": 1.9626485320110632e-05, "loss": 1.3537, "step": 22429 }, { "epoch": 0.8032660662166276, "grad_norm": 1.8746153116226196, "learning_rate": 1.961958459611295e-05, "loss": 1.1188, "step": 22430 }, { "epoch": 0.8033018783461958, "grad_norm": 1.311545491218567, "learning_rate": 1.9612684953530124e-05, "loss": 1.3475, "step": 22431 }, { "epoch": 0.8033376904757641, "grad_norm": 2.6298389434814453, "learning_rate": 1.9605786392454904e-05, "loss": 1.6516, "step": 22432 }, { "epoch": 0.8033735026053325, "grad_norm": 1.625884771347046, "learning_rate": 1.9598888912980117e-05, "loss": 1.1758, "step": 22433 }, { "epoch": 0.8034093147349007, "grad_norm": 2.10892391204834, "learning_rate": 1.9591992515198588e-05, "loss": 1.4329, "step": 22434 }, { "epoch": 0.803445126864469, "grad_norm": 1.5895978212356567, "learning_rate": 1.9585097199203096e-05, "loss": 1.466, "step": 22435 }, { "epoch": 0.8034809389940373, "grad_norm": 1.5277785062789917, "learning_rate": 1.957820296508637e-05, "loss": 1.7112, "step": 22436 }, { "epoch": 0.8035167511236055, "grad_norm": 2.111467123031616, "learning_rate": 1.9571309812941184e-05, "loss": 1.1178, "step": 22437 }, { "epoch": 0.8035525632531738, "grad_norm": 1.8915823698043823, "learning_rate": 1.9564417742860307e-05, "loss": 1.5494, "step": 22438 }, { "epoch": 0.8035883753827421, "grad_norm": 1.9859658479690552, "learning_rate": 1.9557526754936405e-05, "loss": 1.6199, "step": 22439 }, { "epoch": 0.8036241875123105, "grad_norm": 1.3071140050888062, "learning_rate": 1.955063684926225e-05, "loss": 1.3199, "step": 22440 }, { "epoch": 0.8036599996418787, "grad_norm": 1.539048194885254, "learning_rate": 1.9543748025930452e-05, "loss": 1.3173, "step": 22441 }, { "epoch": 0.803695811771447, "grad_norm": 1.7450897693634033, "learning_rate": 1.9536860285033797e-05, "loss": 1.5985, "step": 22442 }, { "epoch": 0.8037316239010153, "grad_norm": 1.7976315021514893, "learning_rate": 1.9529973626664865e-05, "loss": 1.4624, "step": 22443 }, { "epoch": 0.8037674360305835, "grad_norm": 2.065857172012329, "learning_rate": 1.952308805091636e-05, "loss": 1.5356, "step": 22444 }, { "epoch": 0.8038032481601518, "grad_norm": 2.313326597213745, "learning_rate": 1.9516203557880852e-05, "loss": 1.5283, "step": 22445 }, { "epoch": 0.8038390602897201, "grad_norm": 2.275071859359741, "learning_rate": 1.9509320147651068e-05, "loss": 1.4142, "step": 22446 }, { "epoch": 0.8038748724192885, "grad_norm": 2.0012121200561523, "learning_rate": 1.950243782031954e-05, "loss": 1.5083, "step": 22447 }, { "epoch": 0.8039106845488567, "grad_norm": 1.4849156141281128, "learning_rate": 1.9495556575978836e-05, "loss": 1.3491, "step": 22448 }, { "epoch": 0.803946496678425, "grad_norm": 1.4966169595718384, "learning_rate": 1.948867641472163e-05, "loss": 1.4678, "step": 22449 }, { "epoch": 0.8039823088079933, "grad_norm": 1.564099669456482, "learning_rate": 1.9481797336640396e-05, "loss": 1.1639, "step": 22450 }, { "epoch": 0.8040181209375615, "grad_norm": 1.6799238920211792, "learning_rate": 1.9474919341827746e-05, "loss": 1.2108, "step": 22451 }, { "epoch": 0.8040539330671298, "grad_norm": 1.8090828657150269, "learning_rate": 1.946804243037613e-05, "loss": 1.3893, "step": 22452 }, { "epoch": 0.8040897451966981, "grad_norm": 1.8733340501785278, "learning_rate": 1.9461166602378176e-05, "loss": 1.6914, "step": 22453 }, { "epoch": 0.8041255573262664, "grad_norm": 1.3128604888916016, "learning_rate": 1.9454291857926323e-05, "loss": 1.5375, "step": 22454 }, { "epoch": 0.8041613694558347, "grad_norm": 1.3559684753417969, "learning_rate": 1.94474181971131e-05, "loss": 1.5298, "step": 22455 }, { "epoch": 0.804197181585403, "grad_norm": 1.790544033050537, "learning_rate": 1.9440545620030924e-05, "loss": 1.469, "step": 22456 }, { "epoch": 0.8042329937149713, "grad_norm": 1.5944311618804932, "learning_rate": 1.9433674126772306e-05, "loss": 1.4043, "step": 22457 }, { "epoch": 0.8042688058445395, "grad_norm": 1.7077056169509888, "learning_rate": 1.9426803717429696e-05, "loss": 1.4089, "step": 22458 }, { "epoch": 0.8043046179741078, "grad_norm": 1.622973918914795, "learning_rate": 1.941993439209546e-05, "loss": 1.3107, "step": 22459 }, { "epoch": 0.8043404301036761, "grad_norm": 1.6063894033432007, "learning_rate": 1.9413066150862113e-05, "loss": 1.2024, "step": 22460 }, { "epoch": 0.8043762422332444, "grad_norm": 1.5905756950378418, "learning_rate": 1.9406198993822e-05, "loss": 1.5908, "step": 22461 }, { "epoch": 0.8044120543628127, "grad_norm": 1.9238442182540894, "learning_rate": 1.9399332921067537e-05, "loss": 2.0081, "step": 22462 }, { "epoch": 0.804447866492381, "grad_norm": 1.5469660758972168, "learning_rate": 1.939246793269103e-05, "loss": 1.6734, "step": 22463 }, { "epoch": 0.8044836786219492, "grad_norm": 2.7361080646514893, "learning_rate": 1.938560402878494e-05, "loss": 1.7897, "step": 22464 }, { "epoch": 0.8045194907515175, "grad_norm": 1.9339004755020142, "learning_rate": 1.9378741209441565e-05, "loss": 1.3695, "step": 22465 }, { "epoch": 0.8045553028810858, "grad_norm": 1.4602885246276855, "learning_rate": 1.9371879474753208e-05, "loss": 1.5427, "step": 22466 }, { "epoch": 0.8045911150106541, "grad_norm": 1.5360411405563354, "learning_rate": 1.9365018824812208e-05, "loss": 1.4808, "step": 22467 }, { "epoch": 0.8046269271402224, "grad_norm": 1.5212199687957764, "learning_rate": 1.9358159259710874e-05, "loss": 1.1769, "step": 22468 }, { "epoch": 0.8046627392697907, "grad_norm": 2.0291521549224854, "learning_rate": 1.9351300779541503e-05, "loss": 1.5694, "step": 22469 }, { "epoch": 0.804698551399359, "grad_norm": 1.705421805381775, "learning_rate": 1.9344443384396337e-05, "loss": 1.6615, "step": 22470 }, { "epoch": 0.8047343635289272, "grad_norm": 2.076650381088257, "learning_rate": 1.9337587074367637e-05, "loss": 1.5295, "step": 22471 }, { "epoch": 0.8047701756584955, "grad_norm": 1.3150924444198608, "learning_rate": 1.9330731849547655e-05, "loss": 1.019, "step": 22472 }, { "epoch": 0.8048059877880638, "grad_norm": 1.6172802448272705, "learning_rate": 1.9323877710028658e-05, "loss": 1.4884, "step": 22473 }, { "epoch": 0.804841799917632, "grad_norm": 1.3734067678451538, "learning_rate": 1.9317024655902782e-05, "loss": 1.3739, "step": 22474 }, { "epoch": 0.8048776120472004, "grad_norm": 1.7881556749343872, "learning_rate": 1.9310172687262273e-05, "loss": 1.3257, "step": 22475 }, { "epoch": 0.8049134241767687, "grad_norm": 1.5242328643798828, "learning_rate": 1.9303321804199338e-05, "loss": 1.2506, "step": 22476 }, { "epoch": 0.804949236306337, "grad_norm": 1.535390019416809, "learning_rate": 1.9296472006806087e-05, "loss": 0.9729, "step": 22477 }, { "epoch": 0.8049850484359052, "grad_norm": 1.8442243337631226, "learning_rate": 1.9289623295174697e-05, "loss": 1.6024, "step": 22478 }, { "epoch": 0.8050208605654735, "grad_norm": 1.8470580577850342, "learning_rate": 1.9282775669397324e-05, "loss": 1.6681, "step": 22479 }, { "epoch": 0.8050566726950418, "grad_norm": 1.2782084941864014, "learning_rate": 1.9275929129566116e-05, "loss": 1.4994, "step": 22480 }, { "epoch": 0.80509248482461, "grad_norm": 1.826900839805603, "learning_rate": 1.9269083675773126e-05, "loss": 1.5691, "step": 22481 }, { "epoch": 0.8051282969541784, "grad_norm": 1.380555510520935, "learning_rate": 1.9262239308110474e-05, "loss": 1.4017, "step": 22482 }, { "epoch": 0.8051641090837467, "grad_norm": 1.2143405675888062, "learning_rate": 1.925539602667028e-05, "loss": 1.4549, "step": 22483 }, { "epoch": 0.805199921213315, "grad_norm": 1.643473505973816, "learning_rate": 1.924855383154456e-05, "loss": 1.7101, "step": 22484 }, { "epoch": 0.8052357333428832, "grad_norm": 1.6362136602401733, "learning_rate": 1.924171272282538e-05, "loss": 1.5971, "step": 22485 }, { "epoch": 0.8052715454724515, "grad_norm": 1.5110559463500977, "learning_rate": 1.9234872700604777e-05, "loss": 1.3248, "step": 22486 }, { "epoch": 0.8053073576020198, "grad_norm": 1.5972952842712402, "learning_rate": 1.9228033764974818e-05, "loss": 1.3044, "step": 22487 }, { "epoch": 0.805343169731588, "grad_norm": 1.8326233625411987, "learning_rate": 1.9221195916027445e-05, "loss": 1.611, "step": 22488 }, { "epoch": 0.8053789818611564, "grad_norm": 2.9135138988494873, "learning_rate": 1.921435915385469e-05, "loss": 1.9834, "step": 22489 }, { "epoch": 0.8054147939907247, "grad_norm": 1.453930139541626, "learning_rate": 1.9207523478548518e-05, "loss": 1.3931, "step": 22490 }, { "epoch": 0.805450606120293, "grad_norm": 1.5433127880096436, "learning_rate": 1.9200688890200936e-05, "loss": 1.0553, "step": 22491 }, { "epoch": 0.8054864182498612, "grad_norm": 1.3644789457321167, "learning_rate": 1.9193855388903824e-05, "loss": 1.187, "step": 22492 }, { "epoch": 0.8055222303794295, "grad_norm": 1.4411420822143555, "learning_rate": 1.918702297474917e-05, "loss": 1.4759, "step": 22493 }, { "epoch": 0.8055580425089978, "grad_norm": 1.6154589653015137, "learning_rate": 1.9180191647828906e-05, "loss": 1.3965, "step": 22494 }, { "epoch": 0.805593854638566, "grad_norm": 1.5406982898712158, "learning_rate": 1.917336140823488e-05, "loss": 1.3631, "step": 22495 }, { "epoch": 0.8056296667681344, "grad_norm": 1.679787278175354, "learning_rate": 1.916653225605901e-05, "loss": 1.577, "step": 22496 }, { "epoch": 0.8056654788977027, "grad_norm": 1.5739904642105103, "learning_rate": 1.915970419139319e-05, "loss": 1.641, "step": 22497 }, { "epoch": 0.805701291027271, "grad_norm": 1.5268478393554688, "learning_rate": 1.91528772143293e-05, "loss": 1.0907, "step": 22498 }, { "epoch": 0.8057371031568392, "grad_norm": 1.8088730573654175, "learning_rate": 1.9146051324959134e-05, "loss": 1.4507, "step": 22499 }, { "epoch": 0.8057729152864075, "grad_norm": 1.8089536428451538, "learning_rate": 1.9139226523374566e-05, "loss": 1.4117, "step": 22500 }, { "epoch": 0.8058087274159758, "grad_norm": 1.642994999885559, "learning_rate": 1.9132402809667416e-05, "loss": 1.4066, "step": 22501 }, { "epoch": 0.805844539545544, "grad_norm": 2.0810434818267822, "learning_rate": 1.9125580183929448e-05, "loss": 1.5808, "step": 22502 }, { "epoch": 0.8058803516751124, "grad_norm": 1.3621398210525513, "learning_rate": 1.9118758646252477e-05, "loss": 1.1892, "step": 22503 }, { "epoch": 0.8059161638046807, "grad_norm": 1.652403473854065, "learning_rate": 1.9111938196728284e-05, "loss": 1.3299, "step": 22504 }, { "epoch": 0.8059519759342489, "grad_norm": 1.8718891143798828, "learning_rate": 1.9105118835448644e-05, "loss": 1.6525, "step": 22505 }, { "epoch": 0.8059877880638172, "grad_norm": 1.4959759712219238, "learning_rate": 1.9098300562505266e-05, "loss": 1.2341, "step": 22506 }, { "epoch": 0.8060236001933855, "grad_norm": 1.535392165184021, "learning_rate": 1.9091483377989895e-05, "loss": 1.4926, "step": 22507 }, { "epoch": 0.8060594123229537, "grad_norm": 2.2390332221984863, "learning_rate": 1.9084667281994273e-05, "loss": 1.5246, "step": 22508 }, { "epoch": 0.806095224452522, "grad_norm": 1.3575999736785889, "learning_rate": 1.9077852274610055e-05, "loss": 1.6471, "step": 22509 }, { "epoch": 0.8061310365820904, "grad_norm": 2.580101251602173, "learning_rate": 1.9071038355928948e-05, "loss": 1.5999, "step": 22510 }, { "epoch": 0.8061668487116587, "grad_norm": 1.4733290672302246, "learning_rate": 1.9064225526042644e-05, "loss": 1.2515, "step": 22511 }, { "epoch": 0.8062026608412269, "grad_norm": 1.3597815036773682, "learning_rate": 1.90574137850428e-05, "loss": 1.4526, "step": 22512 }, { "epoch": 0.8062384729707952, "grad_norm": 1.9659523963928223, "learning_rate": 1.9050603133021017e-05, "loss": 1.6421, "step": 22513 }, { "epoch": 0.8062742851003635, "grad_norm": 1.9482402801513672, "learning_rate": 1.904379357006896e-05, "loss": 1.3654, "step": 22514 }, { "epoch": 0.8063100972299317, "grad_norm": 1.8046506643295288, "learning_rate": 1.9036985096278227e-05, "loss": 1.6794, "step": 22515 }, { "epoch": 0.8063459093595, "grad_norm": 2.277416944503784, "learning_rate": 1.903017771174046e-05, "loss": 1.6218, "step": 22516 }, { "epoch": 0.8063817214890684, "grad_norm": 1.3956186771392822, "learning_rate": 1.9023371416547177e-05, "loss": 1.5363, "step": 22517 }, { "epoch": 0.8064175336186367, "grad_norm": 1.5622445344924927, "learning_rate": 1.9016566210789977e-05, "loss": 1.4348, "step": 22518 }, { "epoch": 0.8064533457482049, "grad_norm": 1.2763302326202393, "learning_rate": 1.9009762094560446e-05, "loss": 1.5905, "step": 22519 }, { "epoch": 0.8064891578777732, "grad_norm": 1.6055020093917847, "learning_rate": 1.9002959067950066e-05, "loss": 1.5085, "step": 22520 }, { "epoch": 0.8065249700073415, "grad_norm": 1.4015109539031982, "learning_rate": 1.8996157131050395e-05, "loss": 1.4095, "step": 22521 }, { "epoch": 0.8065607821369097, "grad_norm": 2.2692770957946777, "learning_rate": 1.8989356283952943e-05, "loss": 1.556, "step": 22522 }, { "epoch": 0.806596594266478, "grad_norm": 1.5125993490219116, "learning_rate": 1.898255652674924e-05, "loss": 1.4581, "step": 22523 }, { "epoch": 0.8066324063960464, "grad_norm": 1.379358172416687, "learning_rate": 1.8975757859530696e-05, "loss": 1.5579, "step": 22524 }, { "epoch": 0.8066682185256147, "grad_norm": 1.793256163597107, "learning_rate": 1.8968960282388826e-05, "loss": 1.6786, "step": 22525 }, { "epoch": 0.8067040306551829, "grad_norm": 2.028078317642212, "learning_rate": 1.896216379541509e-05, "loss": 1.3897, "step": 22526 }, { "epoch": 0.8067398427847512, "grad_norm": 1.6242742538452148, "learning_rate": 1.895536839870089e-05, "loss": 1.2961, "step": 22527 }, { "epoch": 0.8067756549143195, "grad_norm": 1.5608689785003662, "learning_rate": 1.894857409233769e-05, "loss": 1.0402, "step": 22528 }, { "epoch": 0.8068114670438877, "grad_norm": 1.6409008502960205, "learning_rate": 1.8941780876416826e-05, "loss": 1.4753, "step": 22529 }, { "epoch": 0.806847279173456, "grad_norm": 1.9912757873535156, "learning_rate": 1.893498875102979e-05, "loss": 1.7401, "step": 22530 }, { "epoch": 0.8068830913030244, "grad_norm": 1.8302267789840698, "learning_rate": 1.8928197716267894e-05, "loss": 1.5594, "step": 22531 }, { "epoch": 0.8069189034325926, "grad_norm": 1.399596929550171, "learning_rate": 1.892140777222252e-05, "loss": 1.4571, "step": 22532 }, { "epoch": 0.8069547155621609, "grad_norm": 1.794324278831482, "learning_rate": 1.8914618918985028e-05, "loss": 1.2283, "step": 22533 }, { "epoch": 0.8069905276917292, "grad_norm": 1.7145918607711792, "learning_rate": 1.890783115664676e-05, "loss": 1.2539, "step": 22534 }, { "epoch": 0.8070263398212975, "grad_norm": 1.4848459959030151, "learning_rate": 1.8901044485299034e-05, "loss": 1.41, "step": 22535 }, { "epoch": 0.8070621519508657, "grad_norm": 1.7278633117675781, "learning_rate": 1.889425890503308e-05, "loss": 1.5744, "step": 22536 }, { "epoch": 0.807097964080434, "grad_norm": 1.4958399534225464, "learning_rate": 1.888747441594031e-05, "loss": 1.3393, "step": 22537 }, { "epoch": 0.8071337762100024, "grad_norm": 1.7114999294281006, "learning_rate": 1.888069101811193e-05, "loss": 1.1309, "step": 22538 }, { "epoch": 0.8071695883395706, "grad_norm": 1.3507667779922485, "learning_rate": 1.887390871163922e-05, "loss": 1.3367, "step": 22539 }, { "epoch": 0.8072054004691389, "grad_norm": 2.3738009929656982, "learning_rate": 1.886712749661339e-05, "loss": 1.5551, "step": 22540 }, { "epoch": 0.8072412125987072, "grad_norm": 1.762641191482544, "learning_rate": 1.8860347373125753e-05, "loss": 1.4421, "step": 22541 }, { "epoch": 0.8072770247282754, "grad_norm": 1.4975340366363525, "learning_rate": 1.8853568341267448e-05, "loss": 1.3572, "step": 22542 }, { "epoch": 0.8073128368578437, "grad_norm": 1.7372536659240723, "learning_rate": 1.884679040112971e-05, "loss": 1.1709, "step": 22543 }, { "epoch": 0.807348648987412, "grad_norm": 2.144090175628662, "learning_rate": 1.884001355280376e-05, "loss": 1.6421, "step": 22544 }, { "epoch": 0.8073844611169804, "grad_norm": 1.7356644868850708, "learning_rate": 1.8833237796380708e-05, "loss": 1.3723, "step": 22545 }, { "epoch": 0.8074202732465486, "grad_norm": 1.4547863006591797, "learning_rate": 1.8826463131951767e-05, "loss": 1.3705, "step": 22546 }, { "epoch": 0.8074560853761169, "grad_norm": 1.4055238962173462, "learning_rate": 1.8819689559608012e-05, "loss": 1.2637, "step": 22547 }, { "epoch": 0.8074918975056852, "grad_norm": 1.7052263021469116, "learning_rate": 1.8812917079440673e-05, "loss": 1.4931, "step": 22548 }, { "epoch": 0.8075277096352534, "grad_norm": 1.6998118162155151, "learning_rate": 1.8806145691540777e-05, "loss": 1.7741, "step": 22549 }, { "epoch": 0.8075635217648217, "grad_norm": 1.4957939386367798, "learning_rate": 1.8799375395999487e-05, "loss": 1.4767, "step": 22550 }, { "epoch": 0.80759933389439, "grad_norm": 1.5346788167953491, "learning_rate": 1.8792606192907813e-05, "loss": 1.3791, "step": 22551 }, { "epoch": 0.8076351460239584, "grad_norm": 1.6711095571517944, "learning_rate": 1.878583808235692e-05, "loss": 1.3828, "step": 22552 }, { "epoch": 0.8076709581535266, "grad_norm": 2.229304313659668, "learning_rate": 1.8779071064437813e-05, "loss": 1.8454, "step": 22553 }, { "epoch": 0.8077067702830949, "grad_norm": 1.6836011409759521, "learning_rate": 1.877230513924152e-05, "loss": 1.14, "step": 22554 }, { "epoch": 0.8077425824126632, "grad_norm": 1.786728858947754, "learning_rate": 1.8765540306859076e-05, "loss": 1.4788, "step": 22555 }, { "epoch": 0.8077783945422314, "grad_norm": 3.058734178543091, "learning_rate": 1.8758776567381508e-05, "loss": 1.3963, "step": 22556 }, { "epoch": 0.8078142066717997, "grad_norm": 1.7298702001571655, "learning_rate": 1.8752013920899836e-05, "loss": 1.3446, "step": 22557 }, { "epoch": 0.807850018801368, "grad_norm": 1.8953697681427002, "learning_rate": 1.874525236750495e-05, "loss": 1.6026, "step": 22558 }, { "epoch": 0.8078858309309364, "grad_norm": 1.685193657875061, "learning_rate": 1.8738491907287946e-05, "loss": 1.5745, "step": 22559 }, { "epoch": 0.8079216430605046, "grad_norm": 1.7449640035629272, "learning_rate": 1.8731732540339684e-05, "loss": 1.4257, "step": 22560 }, { "epoch": 0.8079574551900729, "grad_norm": 1.563020944595337, "learning_rate": 1.872497426675116e-05, "loss": 1.2951, "step": 22561 }, { "epoch": 0.8079932673196412, "grad_norm": 1.595478892326355, "learning_rate": 1.8718217086613242e-05, "loss": 1.1794, "step": 22562 }, { "epoch": 0.8080290794492094, "grad_norm": 1.6257182359695435, "learning_rate": 1.871146100001687e-05, "loss": 1.3228, "step": 22563 }, { "epoch": 0.8080648915787777, "grad_norm": 1.8950823545455933, "learning_rate": 1.8704706007052963e-05, "loss": 1.4481, "step": 22564 }, { "epoch": 0.808100703708346, "grad_norm": 1.5394660234451294, "learning_rate": 1.8697952107812344e-05, "loss": 1.249, "step": 22565 }, { "epoch": 0.8081365158379143, "grad_norm": 1.2871990203857422, "learning_rate": 1.86911993023859e-05, "loss": 1.3532, "step": 22566 }, { "epoch": 0.8081723279674826, "grad_norm": 1.3102869987487793, "learning_rate": 1.8684447590864494e-05, "loss": 1.4537, "step": 22567 }, { "epoch": 0.8082081400970509, "grad_norm": 1.5285694599151611, "learning_rate": 1.867769697333899e-05, "loss": 1.2677, "step": 22568 }, { "epoch": 0.8082439522266192, "grad_norm": 1.455868124961853, "learning_rate": 1.8670947449900113e-05, "loss": 1.2461, "step": 22569 }, { "epoch": 0.8082797643561874, "grad_norm": 1.5081433057785034, "learning_rate": 1.8664199020638785e-05, "loss": 1.6674, "step": 22570 }, { "epoch": 0.8083155764857557, "grad_norm": 1.4699350595474243, "learning_rate": 1.8657451685645756e-05, "loss": 1.5963, "step": 22571 }, { "epoch": 0.808351388615324, "grad_norm": 1.518787145614624, "learning_rate": 1.8650705445011752e-05, "loss": 1.1539, "step": 22572 }, { "epoch": 0.8083872007448923, "grad_norm": 1.761563777923584, "learning_rate": 1.8643960298827566e-05, "loss": 1.2219, "step": 22573 }, { "epoch": 0.8084230128744606, "grad_norm": 1.9616132974624634, "learning_rate": 1.8637216247183966e-05, "loss": 1.5145, "step": 22574 }, { "epoch": 0.8084588250040289, "grad_norm": 1.8252731561660767, "learning_rate": 1.8630473290171692e-05, "loss": 1.6176, "step": 22575 }, { "epoch": 0.8084946371335971, "grad_norm": 1.8796595335006714, "learning_rate": 1.8623731427881418e-05, "loss": 1.5545, "step": 22576 }, { "epoch": 0.8085304492631654, "grad_norm": 1.679461121559143, "learning_rate": 1.8616990660403865e-05, "loss": 1.4918, "step": 22577 }, { "epoch": 0.8085662613927337, "grad_norm": 1.4722779989242554, "learning_rate": 1.8610250987829725e-05, "loss": 1.3942, "step": 22578 }, { "epoch": 0.808602073522302, "grad_norm": 1.8476933240890503, "learning_rate": 1.86035124102497e-05, "loss": 1.3052, "step": 22579 }, { "epoch": 0.8086378856518703, "grad_norm": 1.712782621383667, "learning_rate": 1.85967749277544e-05, "loss": 1.0278, "step": 22580 }, { "epoch": 0.8086736977814386, "grad_norm": 1.7284092903137207, "learning_rate": 1.8590038540434485e-05, "loss": 1.5429, "step": 22581 }, { "epoch": 0.8087095099110069, "grad_norm": 1.3968522548675537, "learning_rate": 1.8583303248380625e-05, "loss": 1.2947, "step": 22582 }, { "epoch": 0.8087453220405751, "grad_norm": 1.7425910234451294, "learning_rate": 1.8576569051683368e-05, "loss": 1.552, "step": 22583 }, { "epoch": 0.8087811341701434, "grad_norm": 1.8411518335342407, "learning_rate": 1.8569835950433344e-05, "loss": 1.7172, "step": 22584 }, { "epoch": 0.8088169462997117, "grad_norm": 1.879120945930481, "learning_rate": 1.856310394472114e-05, "loss": 1.374, "step": 22585 }, { "epoch": 0.80885275842928, "grad_norm": 1.4328593015670776, "learning_rate": 1.8556373034637353e-05, "loss": 1.5658, "step": 22586 }, { "epoch": 0.8088885705588483, "grad_norm": 1.5288828611373901, "learning_rate": 1.8549643220272494e-05, "loss": 1.4516, "step": 22587 }, { "epoch": 0.8089243826884166, "grad_norm": 2.343038558959961, "learning_rate": 1.8542914501717113e-05, "loss": 1.4335, "step": 22588 }, { "epoch": 0.8089601948179849, "grad_norm": 1.6488909721374512, "learning_rate": 1.853618687906177e-05, "loss": 0.9991, "step": 22589 }, { "epoch": 0.8089960069475531, "grad_norm": 1.3302148580551147, "learning_rate": 1.852946035239693e-05, "loss": 1.339, "step": 22590 }, { "epoch": 0.8090318190771214, "grad_norm": 1.6579506397247314, "learning_rate": 1.8522734921813113e-05, "loss": 1.4514, "step": 22591 }, { "epoch": 0.8090676312066897, "grad_norm": 1.6817675828933716, "learning_rate": 1.85160105874008e-05, "loss": 1.1681, "step": 22592 }, { "epoch": 0.8091034433362579, "grad_norm": 1.8572685718536377, "learning_rate": 1.8509287349250482e-05, "loss": 1.456, "step": 22593 }, { "epoch": 0.8091392554658263, "grad_norm": 1.4762178659439087, "learning_rate": 1.850256520745256e-05, "loss": 1.4556, "step": 22594 }, { "epoch": 0.8091750675953946, "grad_norm": 1.8500964641571045, "learning_rate": 1.84958441620975e-05, "loss": 1.5579, "step": 22595 }, { "epoch": 0.8092108797249629, "grad_norm": 1.5180333852767944, "learning_rate": 1.8489124213275745e-05, "loss": 1.34, "step": 22596 }, { "epoch": 0.8092466918545311, "grad_norm": 2.089555501937866, "learning_rate": 1.8482405361077658e-05, "loss": 1.6457, "step": 22597 }, { "epoch": 0.8092825039840994, "grad_norm": 1.4597141742706299, "learning_rate": 1.847568760559366e-05, "loss": 1.3754, "step": 22598 }, { "epoch": 0.8093183161136677, "grad_norm": 1.6133396625518799, "learning_rate": 1.8468970946914134e-05, "loss": 1.1957, "step": 22599 }, { "epoch": 0.8093541282432359, "grad_norm": 1.5091086626052856, "learning_rate": 1.8462255385129447e-05, "loss": 1.3334, "step": 22600 }, { "epoch": 0.8093899403728043, "grad_norm": 1.8339647054672241, "learning_rate": 1.8455540920329916e-05, "loss": 1.7524, "step": 22601 }, { "epoch": 0.8094257525023726, "grad_norm": 2.3774209022521973, "learning_rate": 1.8448827552605907e-05, "loss": 1.4291, "step": 22602 }, { "epoch": 0.8094615646319409, "grad_norm": 1.3259594440460205, "learning_rate": 1.8442115282047723e-05, "loss": 1.5422, "step": 22603 }, { "epoch": 0.8094973767615091, "grad_norm": 1.5684672594070435, "learning_rate": 1.8435404108745702e-05, "loss": 1.4201, "step": 22604 }, { "epoch": 0.8095331888910774, "grad_norm": 1.6213066577911377, "learning_rate": 1.8428694032790074e-05, "loss": 1.1581, "step": 22605 }, { "epoch": 0.8095690010206457, "grad_norm": 1.6540571451187134, "learning_rate": 1.8421985054271163e-05, "loss": 1.319, "step": 22606 }, { "epoch": 0.8096048131502139, "grad_norm": 2.1115972995758057, "learning_rate": 1.8415277173279234e-05, "loss": 1.3976, "step": 22607 }, { "epoch": 0.8096406252797823, "grad_norm": 1.4200903177261353, "learning_rate": 1.840857038990449e-05, "loss": 1.2876, "step": 22608 }, { "epoch": 0.8096764374093506, "grad_norm": 2.16260027885437, "learning_rate": 1.840186470423718e-05, "loss": 1.3341, "step": 22609 }, { "epoch": 0.8097122495389188, "grad_norm": 1.4035898447036743, "learning_rate": 1.8395160116367528e-05, "loss": 1.276, "step": 22610 }, { "epoch": 0.8097480616684871, "grad_norm": 2.398355484008789, "learning_rate": 1.8388456626385765e-05, "loss": 1.5374, "step": 22611 }, { "epoch": 0.8097838737980554, "grad_norm": 1.7936995029449463, "learning_rate": 1.838175423438202e-05, "loss": 1.4219, "step": 22612 }, { "epoch": 0.8098196859276237, "grad_norm": 1.4915958642959595, "learning_rate": 1.837505294044649e-05, "loss": 1.4166, "step": 22613 }, { "epoch": 0.8098554980571919, "grad_norm": 1.7961300611495972, "learning_rate": 1.836835274466936e-05, "loss": 1.4323, "step": 22614 }, { "epoch": 0.8098913101867603, "grad_norm": 2.53918719291687, "learning_rate": 1.836165364714072e-05, "loss": 1.3317, "step": 22615 }, { "epoch": 0.8099271223163286, "grad_norm": 1.416369080543518, "learning_rate": 1.835495564795072e-05, "loss": 1.3952, "step": 22616 }, { "epoch": 0.8099629344458968, "grad_norm": 2.098452091217041, "learning_rate": 1.8348258747189484e-05, "loss": 1.4207, "step": 22617 }, { "epoch": 0.8099987465754651, "grad_norm": 1.722528100013733, "learning_rate": 1.8341562944947134e-05, "loss": 1.4759, "step": 22618 }, { "epoch": 0.8100345587050334, "grad_norm": 1.612025499343872, "learning_rate": 1.8334868241313685e-05, "loss": 1.5993, "step": 22619 }, { "epoch": 0.8100703708346016, "grad_norm": 1.5949660539627075, "learning_rate": 1.832817463637925e-05, "loss": 1.4045, "step": 22620 }, { "epoch": 0.8101061829641699, "grad_norm": 1.33736252784729, "learning_rate": 1.832148213023387e-05, "loss": 1.2006, "step": 22621 }, { "epoch": 0.8101419950937383, "grad_norm": 2.1266708374023438, "learning_rate": 1.8314790722967624e-05, "loss": 1.522, "step": 22622 }, { "epoch": 0.8101778072233066, "grad_norm": 1.595590353012085, "learning_rate": 1.8308100414670504e-05, "loss": 1.4679, "step": 22623 }, { "epoch": 0.8102136193528748, "grad_norm": 1.3091840744018555, "learning_rate": 1.830141120543246e-05, "loss": 1.2643, "step": 22624 }, { "epoch": 0.8102494314824431, "grad_norm": 1.8592644929885864, "learning_rate": 1.829472309534359e-05, "loss": 1.4972, "step": 22625 }, { "epoch": 0.8102852436120114, "grad_norm": 1.5312573909759521, "learning_rate": 1.828803608449382e-05, "loss": 1.4416, "step": 22626 }, { "epoch": 0.8103210557415796, "grad_norm": 1.3308011293411255, "learning_rate": 1.828135017297311e-05, "loss": 1.3562, "step": 22627 }, { "epoch": 0.8103568678711479, "grad_norm": 1.6629422903060913, "learning_rate": 1.8274665360871425e-05, "loss": 1.2463, "step": 22628 }, { "epoch": 0.8103926800007163, "grad_norm": 2.118683099746704, "learning_rate": 1.8267981648278733e-05, "loss": 1.5011, "step": 22629 }, { "epoch": 0.8104284921302846, "grad_norm": 1.60123872756958, "learning_rate": 1.8261299035284883e-05, "loss": 1.4359, "step": 22630 }, { "epoch": 0.8104643042598528, "grad_norm": 1.698397159576416, "learning_rate": 1.825461752197983e-05, "loss": 1.4867, "step": 22631 }, { "epoch": 0.8105001163894211, "grad_norm": 1.6148022413253784, "learning_rate": 1.8247937108453482e-05, "loss": 1.4307, "step": 22632 }, { "epoch": 0.8105359285189894, "grad_norm": 1.858364224433899, "learning_rate": 1.8241257794795653e-05, "loss": 1.3802, "step": 22633 }, { "epoch": 0.8105717406485576, "grad_norm": 1.6542712450027466, "learning_rate": 1.8234579581096266e-05, "loss": 1.419, "step": 22634 }, { "epoch": 0.8106075527781259, "grad_norm": 1.4909992218017578, "learning_rate": 1.82279024674451e-05, "loss": 1.3149, "step": 22635 }, { "epoch": 0.8106433649076943, "grad_norm": 1.499837040901184, "learning_rate": 1.8221226453932074e-05, "loss": 1.3224, "step": 22636 }, { "epoch": 0.8106791770372626, "grad_norm": 2.104693651199341, "learning_rate": 1.821455154064693e-05, "loss": 1.3378, "step": 22637 }, { "epoch": 0.8107149891668308, "grad_norm": 2.1410248279571533, "learning_rate": 1.8207877727679523e-05, "loss": 1.29, "step": 22638 }, { "epoch": 0.8107508012963991, "grad_norm": 1.8281619548797607, "learning_rate": 1.820120501511957e-05, "loss": 1.3479, "step": 22639 }, { "epoch": 0.8107866134259674, "grad_norm": 1.728197693824768, "learning_rate": 1.8194533403056935e-05, "loss": 1.5324, "step": 22640 }, { "epoch": 0.8108224255555356, "grad_norm": 1.576294183731079, "learning_rate": 1.8187862891581343e-05, "loss": 1.4459, "step": 22641 }, { "epoch": 0.8108582376851039, "grad_norm": 1.467061996459961, "learning_rate": 1.8181193480782466e-05, "loss": 1.1734, "step": 22642 }, { "epoch": 0.8108940498146723, "grad_norm": 1.5236502885818481, "learning_rate": 1.8174525170750145e-05, "loss": 1.0675, "step": 22643 }, { "epoch": 0.8109298619442405, "grad_norm": 1.7928649187088013, "learning_rate": 1.816785796157402e-05, "loss": 1.6023, "step": 22644 }, { "epoch": 0.8109656740738088, "grad_norm": 2.2303380966186523, "learning_rate": 1.8161191853343827e-05, "loss": 1.3182, "step": 22645 }, { "epoch": 0.8110014862033771, "grad_norm": 1.2965222597122192, "learning_rate": 1.815452684614919e-05, "loss": 1.5056, "step": 22646 }, { "epoch": 0.8110372983329454, "grad_norm": 2.0349364280700684, "learning_rate": 1.8147862940079875e-05, "loss": 1.7971, "step": 22647 }, { "epoch": 0.8110731104625136, "grad_norm": 1.655045509338379, "learning_rate": 1.8141200135225444e-05, "loss": 1.6047, "step": 22648 }, { "epoch": 0.8111089225920819, "grad_norm": 1.3515609502792358, "learning_rate": 1.8134538431675608e-05, "loss": 1.519, "step": 22649 }, { "epoch": 0.8111447347216503, "grad_norm": 1.5632681846618652, "learning_rate": 1.8127877829519935e-05, "loss": 1.5632, "step": 22650 }, { "epoch": 0.8111805468512185, "grad_norm": 1.6890665292739868, "learning_rate": 1.8121218328848054e-05, "loss": 1.2321, "step": 22651 }, { "epoch": 0.8112163589807868, "grad_norm": 1.3986566066741943, "learning_rate": 1.8114559929749586e-05, "loss": 1.0644, "step": 22652 }, { "epoch": 0.8112521711103551, "grad_norm": 2.0749661922454834, "learning_rate": 1.8107902632314044e-05, "loss": 1.2105, "step": 22653 }, { "epoch": 0.8112879832399233, "grad_norm": 1.7615954875946045, "learning_rate": 1.8101246436631093e-05, "loss": 1.3472, "step": 22654 }, { "epoch": 0.8113237953694916, "grad_norm": 1.7018330097198486, "learning_rate": 1.8094591342790202e-05, "loss": 1.5256, "step": 22655 }, { "epoch": 0.8113596074990599, "grad_norm": 1.8836838006973267, "learning_rate": 1.8087937350880957e-05, "loss": 1.4031, "step": 22656 }, { "epoch": 0.8113954196286283, "grad_norm": 1.560875415802002, "learning_rate": 1.8081284460992808e-05, "loss": 1.2982, "step": 22657 }, { "epoch": 0.8114312317581965, "grad_norm": 1.307560920715332, "learning_rate": 1.8074632673215365e-05, "loss": 1.4884, "step": 22658 }, { "epoch": 0.8114670438877648, "grad_norm": 2.2579801082611084, "learning_rate": 1.806798198763805e-05, "loss": 1.6971, "step": 22659 }, { "epoch": 0.8115028560173331, "grad_norm": 1.286411166191101, "learning_rate": 1.806133240435034e-05, "loss": 1.2505, "step": 22660 }, { "epoch": 0.8115386681469013, "grad_norm": 1.7834134101867676, "learning_rate": 1.8054683923441694e-05, "loss": 1.4802, "step": 22661 }, { "epoch": 0.8115744802764696, "grad_norm": 1.6677281856536865, "learning_rate": 1.804803654500159e-05, "loss": 1.3463, "step": 22662 }, { "epoch": 0.8116102924060379, "grad_norm": 1.974789023399353, "learning_rate": 1.8041390269119463e-05, "loss": 1.7445, "step": 22663 }, { "epoch": 0.8116461045356063, "grad_norm": 2.2561769485473633, "learning_rate": 1.8034745095884687e-05, "loss": 1.4882, "step": 22664 }, { "epoch": 0.8116819166651745, "grad_norm": 1.5701444149017334, "learning_rate": 1.802810102538668e-05, "loss": 1.6699, "step": 22665 }, { "epoch": 0.8117177287947428, "grad_norm": 2.0393314361572266, "learning_rate": 1.8021458057714845e-05, "loss": 1.4406, "step": 22666 }, { "epoch": 0.8117535409243111, "grad_norm": 2.785156488418579, "learning_rate": 1.8014816192958574e-05, "loss": 1.2964, "step": 22667 }, { "epoch": 0.8117893530538793, "grad_norm": 1.4221245050430298, "learning_rate": 1.8008175431207173e-05, "loss": 1.4925, "step": 22668 }, { "epoch": 0.8118251651834476, "grad_norm": 1.7360721826553345, "learning_rate": 1.8001535772550006e-05, "loss": 1.3764, "step": 22669 }, { "epoch": 0.8118609773130159, "grad_norm": 1.4904028177261353, "learning_rate": 1.7994897217076423e-05, "loss": 1.1439, "step": 22670 }, { "epoch": 0.8118967894425843, "grad_norm": 1.3772350549697876, "learning_rate": 1.7988259764875705e-05, "loss": 1.4313, "step": 22671 }, { "epoch": 0.8119326015721525, "grad_norm": 3.0207619667053223, "learning_rate": 1.7981623416037163e-05, "loss": 1.1208, "step": 22672 }, { "epoch": 0.8119684137017208, "grad_norm": 1.3616470098495483, "learning_rate": 1.7974988170650075e-05, "loss": 1.4309, "step": 22673 }, { "epoch": 0.8120042258312891, "grad_norm": 1.674111247062683, "learning_rate": 1.7968354028803748e-05, "loss": 1.4841, "step": 22674 }, { "epoch": 0.8120400379608573, "grad_norm": 1.7297577857971191, "learning_rate": 1.7961720990587382e-05, "loss": 1.0489, "step": 22675 }, { "epoch": 0.8120758500904256, "grad_norm": 1.3222150802612305, "learning_rate": 1.7955089056090234e-05, "loss": 1.5225, "step": 22676 }, { "epoch": 0.8121116622199939, "grad_norm": 1.6286765336990356, "learning_rate": 1.7948458225401553e-05, "loss": 1.7537, "step": 22677 }, { "epoch": 0.8121474743495622, "grad_norm": 1.5624029636383057, "learning_rate": 1.7941828498610503e-05, "loss": 1.5025, "step": 22678 }, { "epoch": 0.8121832864791305, "grad_norm": 1.589890956878662, "learning_rate": 1.793519987580631e-05, "loss": 1.1414, "step": 22679 }, { "epoch": 0.8122190986086988, "grad_norm": 2.1349070072174072, "learning_rate": 1.7928572357078143e-05, "loss": 1.079, "step": 22680 }, { "epoch": 0.812254910738267, "grad_norm": 1.6790281534194946, "learning_rate": 1.7921945942515195e-05, "loss": 1.5152, "step": 22681 }, { "epoch": 0.8122907228678353, "grad_norm": 1.7030316591262817, "learning_rate": 1.7915320632206566e-05, "loss": 1.0342, "step": 22682 }, { "epoch": 0.8123265349974036, "grad_norm": 1.1964973211288452, "learning_rate": 1.7908696426241422e-05, "loss": 1.2793, "step": 22683 }, { "epoch": 0.8123623471269719, "grad_norm": 1.6645698547363281, "learning_rate": 1.790207332470887e-05, "loss": 1.5247, "step": 22684 }, { "epoch": 0.8123981592565402, "grad_norm": 2.757772207260132, "learning_rate": 1.7895451327698054e-05, "loss": 1.5142, "step": 22685 }, { "epoch": 0.8124339713861085, "grad_norm": 1.540874719619751, "learning_rate": 1.788883043529801e-05, "loss": 1.4304, "step": 22686 }, { "epoch": 0.8124697835156768, "grad_norm": 1.5452402830123901, "learning_rate": 1.788221064759783e-05, "loss": 1.5054, "step": 22687 }, { "epoch": 0.812505595645245, "grad_norm": 1.5464129447937012, "learning_rate": 1.78755919646866e-05, "loss": 1.294, "step": 22688 }, { "epoch": 0.8125414077748133, "grad_norm": 2.5941967964172363, "learning_rate": 1.7868974386653336e-05, "loss": 1.4364, "step": 22689 }, { "epoch": 0.8125772199043816, "grad_norm": 1.8890364170074463, "learning_rate": 1.786235791358707e-05, "loss": 1.3582, "step": 22690 }, { "epoch": 0.8126130320339499, "grad_norm": 2.1199588775634766, "learning_rate": 1.7855742545576836e-05, "loss": 1.8747, "step": 22691 }, { "epoch": 0.8126488441635182, "grad_norm": 2.581043004989624, "learning_rate": 1.7849128282711647e-05, "loss": 1.471, "step": 22692 }, { "epoch": 0.8126846562930865, "grad_norm": 1.238418698310852, "learning_rate": 1.784251512508045e-05, "loss": 1.4502, "step": 22693 }, { "epoch": 0.8127204684226548, "grad_norm": 1.6828545331954956, "learning_rate": 1.783590307277223e-05, "loss": 1.8595, "step": 22694 }, { "epoch": 0.812756280552223, "grad_norm": 1.997360348701477, "learning_rate": 1.7829292125875984e-05, "loss": 1.4948, "step": 22695 }, { "epoch": 0.8127920926817913, "grad_norm": 1.4616385698318481, "learning_rate": 1.7822682284480585e-05, "loss": 1.6081, "step": 22696 }, { "epoch": 0.8128279048113596, "grad_norm": 1.9267231225967407, "learning_rate": 1.7816073548675004e-05, "loss": 1.3649, "step": 22697 }, { "epoch": 0.8128637169409278, "grad_norm": 2.3256096839904785, "learning_rate": 1.780946591854814e-05, "loss": 1.3092, "step": 22698 }, { "epoch": 0.8128995290704962, "grad_norm": 1.9210196733474731, "learning_rate": 1.780285939418892e-05, "loss": 1.3377, "step": 22699 }, { "epoch": 0.8129353412000645, "grad_norm": 1.4383782148361206, "learning_rate": 1.7796253975686172e-05, "loss": 1.3547, "step": 22700 }, { "epoch": 0.8129711533296328, "grad_norm": 1.6383898258209229, "learning_rate": 1.7789649663128793e-05, "loss": 1.1577, "step": 22701 }, { "epoch": 0.813006965459201, "grad_norm": 1.306366205215454, "learning_rate": 1.7783046456605658e-05, "loss": 1.4998, "step": 22702 }, { "epoch": 0.8130427775887693, "grad_norm": 1.700914978981018, "learning_rate": 1.7776444356205556e-05, "loss": 1.4648, "step": 22703 }, { "epoch": 0.8130785897183376, "grad_norm": 1.3967289924621582, "learning_rate": 1.7769843362017336e-05, "loss": 1.6431, "step": 22704 }, { "epoch": 0.8131144018479058, "grad_norm": 1.7971333265304565, "learning_rate": 1.77632434741298e-05, "loss": 1.5475, "step": 22705 }, { "epoch": 0.8131502139774742, "grad_norm": 1.5884615182876587, "learning_rate": 1.7756644692631773e-05, "loss": 1.3453, "step": 22706 }, { "epoch": 0.8131860261070425, "grad_norm": 2.3906350135803223, "learning_rate": 1.7750047017611983e-05, "loss": 1.4263, "step": 22707 }, { "epoch": 0.8132218382366108, "grad_norm": 1.615214228630066, "learning_rate": 1.7743450449159217e-05, "loss": 1.0195, "step": 22708 }, { "epoch": 0.813257650366179, "grad_norm": 1.702710509300232, "learning_rate": 1.7736854987362217e-05, "loss": 1.5819, "step": 22709 }, { "epoch": 0.8132934624957473, "grad_norm": 1.4902650117874146, "learning_rate": 1.773026063230975e-05, "loss": 1.6003, "step": 22710 }, { "epoch": 0.8133292746253156, "grad_norm": 1.6640796661376953, "learning_rate": 1.7723667384090503e-05, "loss": 1.5305, "step": 22711 }, { "epoch": 0.8133650867548838, "grad_norm": 1.3970403671264648, "learning_rate": 1.7717075242793123e-05, "loss": 1.4203, "step": 22712 }, { "epoch": 0.8134008988844522, "grad_norm": 1.4592771530151367, "learning_rate": 1.771048420850643e-05, "loss": 1.5402, "step": 22713 }, { "epoch": 0.8134367110140205, "grad_norm": 1.4296540021896362, "learning_rate": 1.770389428131899e-05, "loss": 1.4899, "step": 22714 }, { "epoch": 0.8134725231435888, "grad_norm": 1.440382957458496, "learning_rate": 1.769730546131949e-05, "loss": 1.4909, "step": 22715 }, { "epoch": 0.813508335273157, "grad_norm": 1.3283933401107788, "learning_rate": 1.7690717748596585e-05, "loss": 1.6033, "step": 22716 }, { "epoch": 0.8135441474027253, "grad_norm": 1.8007736206054688, "learning_rate": 1.7684131143238937e-05, "loss": 1.4637, "step": 22717 }, { "epoch": 0.8135799595322936, "grad_norm": 1.7173250913619995, "learning_rate": 1.767754564533509e-05, "loss": 1.4911, "step": 22718 }, { "epoch": 0.8136157716618618, "grad_norm": 1.7633723020553589, "learning_rate": 1.7670961254973682e-05, "loss": 1.6723, "step": 22719 }, { "epoch": 0.8136515837914302, "grad_norm": 2.1998519897460938, "learning_rate": 1.766437797224332e-05, "loss": 1.2689, "step": 22720 }, { "epoch": 0.8136873959209985, "grad_norm": 1.6470333337783813, "learning_rate": 1.7657795797232525e-05, "loss": 1.4681, "step": 22721 }, { "epoch": 0.8137232080505667, "grad_norm": 1.4363399744033813, "learning_rate": 1.7651214730029897e-05, "loss": 1.5119, "step": 22722 }, { "epoch": 0.813759020180135, "grad_norm": 1.6144115924835205, "learning_rate": 1.7644634770723888e-05, "loss": 1.2808, "step": 22723 }, { "epoch": 0.8137948323097033, "grad_norm": 1.3689866065979004, "learning_rate": 1.763805591940315e-05, "loss": 1.2235, "step": 22724 }, { "epoch": 0.8138306444392716, "grad_norm": 1.7141789197921753, "learning_rate": 1.7631478176156113e-05, "loss": 1.2982, "step": 22725 }, { "epoch": 0.8138664565688398, "grad_norm": 1.6982225179672241, "learning_rate": 1.762490154107128e-05, "loss": 1.4998, "step": 22726 }, { "epoch": 0.8139022686984082, "grad_norm": 1.3144981861114502, "learning_rate": 1.761832601423714e-05, "loss": 1.3089, "step": 22727 }, { "epoch": 0.8139380808279765, "grad_norm": 1.2160977125167847, "learning_rate": 1.7611751595742188e-05, "loss": 1.0489, "step": 22728 }, { "epoch": 0.8139738929575447, "grad_norm": 1.6171183586120605, "learning_rate": 1.760517828567484e-05, "loss": 1.6103, "step": 22729 }, { "epoch": 0.814009705087113, "grad_norm": 1.4586262702941895, "learning_rate": 1.759860608412349e-05, "loss": 1.0743, "step": 22730 }, { "epoch": 0.8140455172166813, "grad_norm": 1.4333014488220215, "learning_rate": 1.7592034991176654e-05, "loss": 1.4654, "step": 22731 }, { "epoch": 0.8140813293462495, "grad_norm": 1.6419109106063843, "learning_rate": 1.7585465006922662e-05, "loss": 1.2001, "step": 22732 }, { "epoch": 0.8141171414758178, "grad_norm": 1.2818939685821533, "learning_rate": 1.757889613144995e-05, "loss": 1.3681, "step": 22733 }, { "epoch": 0.8141529536053862, "grad_norm": 1.5382506847381592, "learning_rate": 1.7572328364846836e-05, "loss": 1.4991, "step": 22734 }, { "epoch": 0.8141887657349545, "grad_norm": 1.436592698097229, "learning_rate": 1.7565761707201767e-05, "loss": 1.2363, "step": 22735 }, { "epoch": 0.8142245778645227, "grad_norm": 1.5679230690002441, "learning_rate": 1.7559196158603018e-05, "loss": 1.3993, "step": 22736 }, { "epoch": 0.814260389994091, "grad_norm": 1.6096587181091309, "learning_rate": 1.755263171913897e-05, "loss": 1.2466, "step": 22737 }, { "epoch": 0.8142962021236593, "grad_norm": 1.662288784980774, "learning_rate": 1.7546068388897885e-05, "loss": 1.4613, "step": 22738 }, { "epoch": 0.8143320142532275, "grad_norm": 1.7570534944534302, "learning_rate": 1.753950616796809e-05, "loss": 1.493, "step": 22739 }, { "epoch": 0.8143678263827958, "grad_norm": 1.7655926942825317, "learning_rate": 1.75329450564379e-05, "loss": 1.3305, "step": 22740 }, { "epoch": 0.8144036385123642, "grad_norm": 1.5644776821136475, "learning_rate": 1.75263850543955e-05, "loss": 1.2155, "step": 22741 }, { "epoch": 0.8144394506419325, "grad_norm": 2.489290714263916, "learning_rate": 1.7519826161929266e-05, "loss": 1.6175, "step": 22742 }, { "epoch": 0.8144752627715007, "grad_norm": 1.4218047857284546, "learning_rate": 1.7513268379127356e-05, "loss": 1.5427, "step": 22743 }, { "epoch": 0.814511074901069, "grad_norm": 1.5447837114334106, "learning_rate": 1.750671170607804e-05, "loss": 1.0916, "step": 22744 }, { "epoch": 0.8145468870306373, "grad_norm": 1.4648761749267578, "learning_rate": 1.7500156142869462e-05, "loss": 1.311, "step": 22745 }, { "epoch": 0.8145826991602055, "grad_norm": 1.8207685947418213, "learning_rate": 1.7493601689589913e-05, "loss": 1.169, "step": 22746 }, { "epoch": 0.8146185112897738, "grad_norm": 1.640710711479187, "learning_rate": 1.748704834632753e-05, "loss": 1.5011, "step": 22747 }, { "epoch": 0.8146543234193422, "grad_norm": 1.3525753021240234, "learning_rate": 1.748049611317045e-05, "loss": 1.5688, "step": 22748 }, { "epoch": 0.8146901355489105, "grad_norm": 1.8010128736495972, "learning_rate": 1.7473944990206858e-05, "loss": 1.1804, "step": 22749 }, { "epoch": 0.8147259476784787, "grad_norm": 1.575965404510498, "learning_rate": 1.7467394977524876e-05, "loss": 1.385, "step": 22750 }, { "epoch": 0.814761759808047, "grad_norm": 1.5879836082458496, "learning_rate": 1.7460846075212666e-05, "loss": 1.0562, "step": 22751 }, { "epoch": 0.8147975719376153, "grad_norm": 1.2580584287643433, "learning_rate": 1.745429828335826e-05, "loss": 1.467, "step": 22752 }, { "epoch": 0.8148333840671835, "grad_norm": 1.7832731008529663, "learning_rate": 1.7447751602049835e-05, "loss": 1.1617, "step": 22753 }, { "epoch": 0.8148691961967518, "grad_norm": 1.6994675397872925, "learning_rate": 1.74412060313754e-05, "loss": 1.5291, "step": 22754 }, { "epoch": 0.8149050083263202, "grad_norm": 1.608042597770691, "learning_rate": 1.7434661571423084e-05, "loss": 1.4662, "step": 22755 }, { "epoch": 0.8149408204558884, "grad_norm": 1.4791151285171509, "learning_rate": 1.7428118222280855e-05, "loss": 1.4369, "step": 22756 }, { "epoch": 0.8149766325854567, "grad_norm": 1.5534361600875854, "learning_rate": 1.7421575984036797e-05, "loss": 1.4834, "step": 22757 }, { "epoch": 0.815012444715025, "grad_norm": 1.302477240562439, "learning_rate": 1.7415034856778934e-05, "loss": 1.4125, "step": 22758 }, { "epoch": 0.8150482568445933, "grad_norm": 1.541063666343689, "learning_rate": 1.7408494840595224e-05, "loss": 1.2171, "step": 22759 }, { "epoch": 0.8150840689741615, "grad_norm": 2.021052122116089, "learning_rate": 1.7401955935573688e-05, "loss": 1.6671, "step": 22760 }, { "epoch": 0.8151198811037298, "grad_norm": 2.089958429336548, "learning_rate": 1.739541814180228e-05, "loss": 1.5129, "step": 22761 }, { "epoch": 0.8151556932332982, "grad_norm": 1.526520848274231, "learning_rate": 1.7388881459369e-05, "loss": 1.4541, "step": 22762 }, { "epoch": 0.8151915053628664, "grad_norm": 1.9976831674575806, "learning_rate": 1.738234588836174e-05, "loss": 1.3426, "step": 22763 }, { "epoch": 0.8152273174924347, "grad_norm": 1.4353028535842896, "learning_rate": 1.737581142886844e-05, "loss": 1.292, "step": 22764 }, { "epoch": 0.815263129622003, "grad_norm": 1.6733012199401855, "learning_rate": 1.7369278080977037e-05, "loss": 1.4501, "step": 22765 }, { "epoch": 0.8152989417515712, "grad_norm": 1.6345852613449097, "learning_rate": 1.7362745844775396e-05, "loss": 1.4787, "step": 22766 }, { "epoch": 0.8153347538811395, "grad_norm": 1.6095476150512695, "learning_rate": 1.7356214720351414e-05, "loss": 1.1239, "step": 22767 }, { "epoch": 0.8153705660107078, "grad_norm": 2.355900526046753, "learning_rate": 1.7349684707792956e-05, "loss": 1.4855, "step": 22768 }, { "epoch": 0.8154063781402762, "grad_norm": 1.734176516532898, "learning_rate": 1.7343155807187915e-05, "loss": 1.3749, "step": 22769 }, { "epoch": 0.8154421902698444, "grad_norm": 2.209763765335083, "learning_rate": 1.7336628018624058e-05, "loss": 1.4435, "step": 22770 }, { "epoch": 0.8154780023994127, "grad_norm": 1.8333766460418701, "learning_rate": 1.7330101342189254e-05, "loss": 1.4678, "step": 22771 }, { "epoch": 0.815513814528981, "grad_norm": 2.5825207233428955, "learning_rate": 1.732357577797129e-05, "loss": 1.0537, "step": 22772 }, { "epoch": 0.8155496266585492, "grad_norm": 1.7145079374313354, "learning_rate": 1.7317051326057998e-05, "loss": 1.4113, "step": 22773 }, { "epoch": 0.8155854387881175, "grad_norm": 1.4004257917404175, "learning_rate": 1.7310527986537095e-05, "loss": 1.4613, "step": 22774 }, { "epoch": 0.8156212509176858, "grad_norm": 1.3757426738739014, "learning_rate": 1.7304005759496377e-05, "loss": 1.5297, "step": 22775 }, { "epoch": 0.815657063047254, "grad_norm": 1.3896691799163818, "learning_rate": 1.729748464502362e-05, "loss": 1.3216, "step": 22776 }, { "epoch": 0.8156928751768224, "grad_norm": 1.7896462678909302, "learning_rate": 1.729096464320651e-05, "loss": 1.4618, "step": 22777 }, { "epoch": 0.8157286873063907, "grad_norm": 1.3658167123794556, "learning_rate": 1.7284445754132772e-05, "loss": 1.6294, "step": 22778 }, { "epoch": 0.815764499435959, "grad_norm": 1.7124221324920654, "learning_rate": 1.727792797789013e-05, "loss": 1.4627, "step": 22779 }, { "epoch": 0.8158003115655272, "grad_norm": 1.5317054986953735, "learning_rate": 1.7271411314566287e-05, "loss": 1.4144, "step": 22780 }, { "epoch": 0.8158361236950955, "grad_norm": 1.3445594310760498, "learning_rate": 1.7264895764248868e-05, "loss": 1.4471, "step": 22781 }, { "epoch": 0.8158719358246638, "grad_norm": 1.3334885835647583, "learning_rate": 1.7258381327025552e-05, "loss": 1.3444, "step": 22782 }, { "epoch": 0.815907747954232, "grad_norm": 1.8246225118637085, "learning_rate": 1.7251868002984005e-05, "loss": 1.5161, "step": 22783 }, { "epoch": 0.8159435600838004, "grad_norm": 1.5593852996826172, "learning_rate": 1.7245355792211826e-05, "loss": 1.6261, "step": 22784 }, { "epoch": 0.8159793722133687, "grad_norm": 1.728540301322937, "learning_rate": 1.723884469479663e-05, "loss": 1.3071, "step": 22785 }, { "epoch": 0.816015184342937, "grad_norm": 1.4976931810379028, "learning_rate": 1.7232334710826025e-05, "loss": 1.4344, "step": 22786 }, { "epoch": 0.8160509964725052, "grad_norm": 1.657505989074707, "learning_rate": 1.722582584038762e-05, "loss": 1.5328, "step": 22787 }, { "epoch": 0.8160868086020735, "grad_norm": 1.5885651111602783, "learning_rate": 1.7219318083568937e-05, "loss": 1.1093, "step": 22788 }, { "epoch": 0.8161226207316418, "grad_norm": 1.4087108373641968, "learning_rate": 1.7212811440457545e-05, "loss": 1.2369, "step": 22789 }, { "epoch": 0.81615843286121, "grad_norm": 1.7424745559692383, "learning_rate": 1.7206305911141017e-05, "loss": 1.5162, "step": 22790 }, { "epoch": 0.8161942449907784, "grad_norm": 1.5498261451721191, "learning_rate": 1.7199801495706812e-05, "loss": 1.087, "step": 22791 }, { "epoch": 0.8162300571203467, "grad_norm": 1.4619683027267456, "learning_rate": 1.719329819424248e-05, "loss": 1.4748, "step": 22792 }, { "epoch": 0.816265869249915, "grad_norm": 1.8188879489898682, "learning_rate": 1.7186796006835514e-05, "loss": 1.5167, "step": 22793 }, { "epoch": 0.8163016813794832, "grad_norm": 1.7282185554504395, "learning_rate": 1.7180294933573405e-05, "loss": 1.5527, "step": 22794 }, { "epoch": 0.8163374935090515, "grad_norm": 1.842283844947815, "learning_rate": 1.7173794974543568e-05, "loss": 1.3138, "step": 22795 }, { "epoch": 0.8163733056386198, "grad_norm": 2.1575214862823486, "learning_rate": 1.7167296129833488e-05, "loss": 1.7364, "step": 22796 }, { "epoch": 0.816409117768188, "grad_norm": 1.707844614982605, "learning_rate": 1.7160798399530586e-05, "loss": 1.282, "step": 22797 }, { "epoch": 0.8164449298977564, "grad_norm": 1.5645873546600342, "learning_rate": 1.7154301783722315e-05, "loss": 1.1909, "step": 22798 }, { "epoch": 0.8164807420273247, "grad_norm": 1.1664835214614868, "learning_rate": 1.7147806282496027e-05, "loss": 1.4765, "step": 22799 }, { "epoch": 0.816516554156893, "grad_norm": 1.5875605344772339, "learning_rate": 1.7141311895939137e-05, "loss": 1.5326, "step": 22800 }, { "epoch": 0.8165523662864612, "grad_norm": 1.8210203647613525, "learning_rate": 1.7134818624139036e-05, "loss": 1.204, "step": 22801 }, { "epoch": 0.8165881784160295, "grad_norm": 1.5193594694137573, "learning_rate": 1.7128326467183032e-05, "loss": 1.2251, "step": 22802 }, { "epoch": 0.8166239905455978, "grad_norm": 1.719142198562622, "learning_rate": 1.7121835425158506e-05, "loss": 1.6163, "step": 22803 }, { "epoch": 0.816659802675166, "grad_norm": 1.998476266860962, "learning_rate": 1.711534549815278e-05, "loss": 1.3592, "step": 22804 }, { "epoch": 0.8166956148047344, "grad_norm": 1.435386300086975, "learning_rate": 1.7108856686253183e-05, "loss": 1.2512, "step": 22805 }, { "epoch": 0.8167314269343027, "grad_norm": 1.4732717275619507, "learning_rate": 1.710236898954698e-05, "loss": 1.3226, "step": 22806 }, { "epoch": 0.8167672390638709, "grad_norm": 1.7154697179794312, "learning_rate": 1.7095882408121468e-05, "loss": 1.2685, "step": 22807 }, { "epoch": 0.8168030511934392, "grad_norm": 1.3846417665481567, "learning_rate": 1.708939694206395e-05, "loss": 1.543, "step": 22808 }, { "epoch": 0.8168388633230075, "grad_norm": 1.680985689163208, "learning_rate": 1.708291259146162e-05, "loss": 1.3678, "step": 22809 }, { "epoch": 0.8168746754525757, "grad_norm": 1.3640410900115967, "learning_rate": 1.7076429356401748e-05, "loss": 1.5972, "step": 22810 }, { "epoch": 0.816910487582144, "grad_norm": 1.7066844701766968, "learning_rate": 1.706994723697155e-05, "loss": 1.1552, "step": 22811 }, { "epoch": 0.8169462997117124, "grad_norm": 2.2980430126190186, "learning_rate": 1.7063466233258275e-05, "loss": 1.3957, "step": 22812 }, { "epoch": 0.8169821118412807, "grad_norm": 1.7297414541244507, "learning_rate": 1.7056986345349046e-05, "loss": 1.5695, "step": 22813 }, { "epoch": 0.8170179239708489, "grad_norm": 2.15028977394104, "learning_rate": 1.7050507573331077e-05, "loss": 1.2113, "step": 22814 }, { "epoch": 0.8170537361004172, "grad_norm": 1.3350656032562256, "learning_rate": 1.7044029917291536e-05, "loss": 1.1719, "step": 22815 }, { "epoch": 0.8170895482299855, "grad_norm": 1.7217737436294556, "learning_rate": 1.7037553377317595e-05, "loss": 1.2585, "step": 22816 }, { "epoch": 0.8171253603595537, "grad_norm": 1.6362788677215576, "learning_rate": 1.7031077953496356e-05, "loss": 1.433, "step": 22817 }, { "epoch": 0.817161172489122, "grad_norm": 1.7198362350463867, "learning_rate": 1.7024603645914896e-05, "loss": 1.3631, "step": 22818 }, { "epoch": 0.8171969846186904, "grad_norm": 1.4463917016983032, "learning_rate": 1.7018130454660395e-05, "loss": 1.486, "step": 22819 }, { "epoch": 0.8172327967482587, "grad_norm": 2.732374906539917, "learning_rate": 1.7011658379819904e-05, "loss": 1.2624, "step": 22820 }, { "epoch": 0.8172686088778269, "grad_norm": 1.4441410303115845, "learning_rate": 1.7005187421480517e-05, "loss": 1.2175, "step": 22821 }, { "epoch": 0.8173044210073952, "grad_norm": 1.7178325653076172, "learning_rate": 1.699871757972924e-05, "loss": 1.5518, "step": 22822 }, { "epoch": 0.8173402331369635, "grad_norm": 2.081376075744629, "learning_rate": 1.6992248854653192e-05, "loss": 1.4921, "step": 22823 }, { "epoch": 0.8173760452665317, "grad_norm": 2.20284104347229, "learning_rate": 1.698578124633934e-05, "loss": 1.6761, "step": 22824 }, { "epoch": 0.8174118573961, "grad_norm": 1.9613070487976074, "learning_rate": 1.6979314754874733e-05, "loss": 1.6807, "step": 22825 }, { "epoch": 0.8174476695256684, "grad_norm": 1.3908569812774658, "learning_rate": 1.6972849380346367e-05, "loss": 1.3088, "step": 22826 }, { "epoch": 0.8174834816552367, "grad_norm": 1.7931389808654785, "learning_rate": 1.696638512284119e-05, "loss": 1.5472, "step": 22827 }, { "epoch": 0.8175192937848049, "grad_norm": 2.0211594104766846, "learning_rate": 1.6959921982446225e-05, "loss": 1.6009, "step": 22828 }, { "epoch": 0.8175551059143732, "grad_norm": 1.730381965637207, "learning_rate": 1.6953459959248354e-05, "loss": 1.2824, "step": 22829 }, { "epoch": 0.8175909180439415, "grad_norm": 1.6810767650604248, "learning_rate": 1.69469990533346e-05, "loss": 1.3071, "step": 22830 }, { "epoch": 0.8176267301735097, "grad_norm": 1.639445185661316, "learning_rate": 1.694053926479181e-05, "loss": 1.2268, "step": 22831 }, { "epoch": 0.817662542303078, "grad_norm": 1.8805431127548218, "learning_rate": 1.6934080593706958e-05, "loss": 1.2137, "step": 22832 }, { "epoch": 0.8176983544326464, "grad_norm": 1.8979594707489014, "learning_rate": 1.692762304016685e-05, "loss": 1.4345, "step": 22833 }, { "epoch": 0.8177341665622146, "grad_norm": 1.65707528591156, "learning_rate": 1.6921166604258475e-05, "loss": 1.3987, "step": 22834 }, { "epoch": 0.8177699786917829, "grad_norm": 1.6859840154647827, "learning_rate": 1.691471128606864e-05, "loss": 1.6425, "step": 22835 }, { "epoch": 0.8178057908213512, "grad_norm": 1.899539589881897, "learning_rate": 1.6908257085684143e-05, "loss": 1.5508, "step": 22836 }, { "epoch": 0.8178416029509195, "grad_norm": 1.8419172763824463, "learning_rate": 1.6901804003191914e-05, "loss": 1.3642, "step": 22837 }, { "epoch": 0.8178774150804877, "grad_norm": 1.5693961381912231, "learning_rate": 1.6895352038678692e-05, "loss": 1.6069, "step": 22838 }, { "epoch": 0.817913227210056, "grad_norm": 1.3967636823654175, "learning_rate": 1.6888901192231342e-05, "loss": 1.4029, "step": 22839 }, { "epoch": 0.8179490393396244, "grad_norm": 1.2297767400741577, "learning_rate": 1.6882451463936566e-05, "loss": 1.2852, "step": 22840 }, { "epoch": 0.8179848514691926, "grad_norm": 1.8269104957580566, "learning_rate": 1.6876002853881244e-05, "loss": 1.4348, "step": 22841 }, { "epoch": 0.8180206635987609, "grad_norm": 1.6397745609283447, "learning_rate": 1.6869555362152056e-05, "loss": 1.2093, "step": 22842 }, { "epoch": 0.8180564757283292, "grad_norm": 1.4336014986038208, "learning_rate": 1.6863108988835797e-05, "loss": 1.1987, "step": 22843 }, { "epoch": 0.8180922878578974, "grad_norm": 2.0080535411834717, "learning_rate": 1.685666373401914e-05, "loss": 1.517, "step": 22844 }, { "epoch": 0.8181280999874657, "grad_norm": 1.9427084922790527, "learning_rate": 1.685021959778883e-05, "loss": 1.4457, "step": 22845 }, { "epoch": 0.818163912117034, "grad_norm": 1.9075568914413452, "learning_rate": 1.6843776580231586e-05, "loss": 1.2082, "step": 22846 }, { "epoch": 0.8181997242466024, "grad_norm": 1.3768006563186646, "learning_rate": 1.6837334681434037e-05, "loss": 1.3797, "step": 22847 }, { "epoch": 0.8182355363761706, "grad_norm": 2.21150541305542, "learning_rate": 1.683089390148287e-05, "loss": 1.6664, "step": 22848 }, { "epoch": 0.8182713485057389, "grad_norm": 2.0442562103271484, "learning_rate": 1.6824454240464748e-05, "loss": 1.4715, "step": 22849 }, { "epoch": 0.8183071606353072, "grad_norm": 1.8075312376022339, "learning_rate": 1.6818015698466338e-05, "loss": 1.2415, "step": 22850 }, { "epoch": 0.8183429727648754, "grad_norm": 1.6639543771743774, "learning_rate": 1.681157827557418e-05, "loss": 1.3192, "step": 22851 }, { "epoch": 0.8183787848944437, "grad_norm": 1.579306960105896, "learning_rate": 1.680514197187497e-05, "loss": 1.5554, "step": 22852 }, { "epoch": 0.818414597024012, "grad_norm": 1.3775279521942139, "learning_rate": 1.6798706787455264e-05, "loss": 1.4963, "step": 22853 }, { "epoch": 0.8184504091535804, "grad_norm": 2.1027517318725586, "learning_rate": 1.6792272722401626e-05, "loss": 1.2141, "step": 22854 }, { "epoch": 0.8184862212831486, "grad_norm": 1.735244870185852, "learning_rate": 1.6785839776800615e-05, "loss": 1.4396, "step": 22855 }, { "epoch": 0.8185220334127169, "grad_norm": 1.4876716136932373, "learning_rate": 1.677940795073879e-05, "loss": 1.7742, "step": 22856 }, { "epoch": 0.8185578455422852, "grad_norm": 1.8125195503234863, "learning_rate": 1.6772977244302714e-05, "loss": 1.6015, "step": 22857 }, { "epoch": 0.8185936576718534, "grad_norm": 1.8210275173187256, "learning_rate": 1.6766547657578844e-05, "loss": 1.4319, "step": 22858 }, { "epoch": 0.8186294698014217, "grad_norm": 1.593743085861206, "learning_rate": 1.6760119190653724e-05, "loss": 1.2788, "step": 22859 }, { "epoch": 0.81866528193099, "grad_norm": 1.4638431072235107, "learning_rate": 1.6753691843613818e-05, "loss": 1.3581, "step": 22860 }, { "epoch": 0.8187010940605584, "grad_norm": 1.6058062314987183, "learning_rate": 1.6747265616545625e-05, "loss": 1.0587, "step": 22861 }, { "epoch": 0.8187369061901266, "grad_norm": 1.6514182090759277, "learning_rate": 1.674084050953557e-05, "loss": 1.288, "step": 22862 }, { "epoch": 0.8187727183196949, "grad_norm": 1.6830849647521973, "learning_rate": 1.6734416522670114e-05, "loss": 1.466, "step": 22863 }, { "epoch": 0.8188085304492632, "grad_norm": 1.5081751346588135, "learning_rate": 1.6727993656035702e-05, "loss": 1.1591, "step": 22864 }, { "epoch": 0.8188443425788314, "grad_norm": 1.801693081855774, "learning_rate": 1.672157190971869e-05, "loss": 1.4321, "step": 22865 }, { "epoch": 0.8188801547083997, "grad_norm": 1.6656297445297241, "learning_rate": 1.671515128380551e-05, "loss": 1.4196, "step": 22866 }, { "epoch": 0.818915966837968, "grad_norm": 1.7084009647369385, "learning_rate": 1.6708731778382546e-05, "loss": 1.4801, "step": 22867 }, { "epoch": 0.8189517789675363, "grad_norm": 1.5573643445968628, "learning_rate": 1.6702313393536173e-05, "loss": 1.4912, "step": 22868 }, { "epoch": 0.8189875910971046, "grad_norm": 1.7359768152236938, "learning_rate": 1.6695896129352705e-05, "loss": 1.6653, "step": 22869 }, { "epoch": 0.8190234032266729, "grad_norm": 1.4117817878723145, "learning_rate": 1.66894799859185e-05, "loss": 1.2851, "step": 22870 }, { "epoch": 0.8190592153562412, "grad_norm": 1.3178173303604126, "learning_rate": 1.6683064963319906e-05, "loss": 0.9814, "step": 22871 }, { "epoch": 0.8190950274858094, "grad_norm": 1.5659393072128296, "learning_rate": 1.6676651061643177e-05, "loss": 1.4266, "step": 22872 }, { "epoch": 0.8191308396153777, "grad_norm": 1.5842785835266113, "learning_rate": 1.6670238280974627e-05, "loss": 1.5485, "step": 22873 }, { "epoch": 0.819166651744946, "grad_norm": 1.5451146364212036, "learning_rate": 1.6663826621400537e-05, "loss": 1.4251, "step": 22874 }, { "epoch": 0.8192024638745143, "grad_norm": 1.8988233804702759, "learning_rate": 1.6657416083007184e-05, "loss": 1.5944, "step": 22875 }, { "epoch": 0.8192382760040826, "grad_norm": 1.5368101596832275, "learning_rate": 1.6651006665880776e-05, "loss": 1.3794, "step": 22876 }, { "epoch": 0.8192740881336509, "grad_norm": 3.667703628540039, "learning_rate": 1.6644598370107554e-05, "loss": 1.618, "step": 22877 }, { "epoch": 0.8193099002632191, "grad_norm": 1.5384092330932617, "learning_rate": 1.6638191195773744e-05, "loss": 1.3134, "step": 22878 }, { "epoch": 0.8193457123927874, "grad_norm": 1.6481189727783203, "learning_rate": 1.6631785142965563e-05, "loss": 1.5524, "step": 22879 }, { "epoch": 0.8193815245223557, "grad_norm": 1.4624632596969604, "learning_rate": 1.6625380211769147e-05, "loss": 1.3622, "step": 22880 }, { "epoch": 0.819417336651924, "grad_norm": 1.7683665752410889, "learning_rate": 1.6618976402270704e-05, "loss": 1.4516, "step": 22881 }, { "epoch": 0.8194531487814923, "grad_norm": 1.6138049364089966, "learning_rate": 1.66125737145564e-05, "loss": 1.5001, "step": 22882 }, { "epoch": 0.8194889609110606, "grad_norm": 1.509700059890747, "learning_rate": 1.6606172148712328e-05, "loss": 1.4476, "step": 22883 }, { "epoch": 0.8195247730406289, "grad_norm": 1.956424593925476, "learning_rate": 1.659977170482464e-05, "loss": 1.2311, "step": 22884 }, { "epoch": 0.8195605851701971, "grad_norm": 1.6657973527908325, "learning_rate": 1.6593372382979455e-05, "loss": 1.3247, "step": 22885 }, { "epoch": 0.8195963972997654, "grad_norm": 1.5629335641860962, "learning_rate": 1.658697418326287e-05, "loss": 1.169, "step": 22886 }, { "epoch": 0.8196322094293337, "grad_norm": 1.9260749816894531, "learning_rate": 1.658057710576093e-05, "loss": 1.5238, "step": 22887 }, { "epoch": 0.819668021558902, "grad_norm": 1.5545670986175537, "learning_rate": 1.657418115055973e-05, "loss": 1.4566, "step": 22888 }, { "epoch": 0.8197038336884703, "grad_norm": 2.068680763244629, "learning_rate": 1.6567786317745327e-05, "loss": 1.4548, "step": 22889 }, { "epoch": 0.8197396458180386, "grad_norm": 1.982734203338623, "learning_rate": 1.6561392607403713e-05, "loss": 1.3612, "step": 22890 }, { "epoch": 0.8197754579476069, "grad_norm": 1.426444172859192, "learning_rate": 1.655500001962095e-05, "loss": 1.4532, "step": 22891 }, { "epoch": 0.8198112700771751, "grad_norm": 1.4528121948242188, "learning_rate": 1.6548608554483e-05, "loss": 1.4581, "step": 22892 }, { "epoch": 0.8198470822067434, "grad_norm": 1.5887699127197266, "learning_rate": 1.6542218212075923e-05, "loss": 1.633, "step": 22893 }, { "epoch": 0.8198828943363117, "grad_norm": 1.5253174304962158, "learning_rate": 1.6535828992485613e-05, "loss": 1.286, "step": 22894 }, { "epoch": 0.8199187064658799, "grad_norm": 1.6747887134552002, "learning_rate": 1.6529440895798065e-05, "loss": 1.3259, "step": 22895 }, { "epoch": 0.8199545185954483, "grad_norm": 1.4363501071929932, "learning_rate": 1.6523053922099242e-05, "loss": 1.3707, "step": 22896 }, { "epoch": 0.8199903307250166, "grad_norm": 1.5821083784103394, "learning_rate": 1.651666807147503e-05, "loss": 1.4022, "step": 22897 }, { "epoch": 0.8200261428545849, "grad_norm": 1.7014988660812378, "learning_rate": 1.651028334401137e-05, "loss": 1.6497, "step": 22898 }, { "epoch": 0.8200619549841531, "grad_norm": 1.6083884239196777, "learning_rate": 1.6503899739794138e-05, "loss": 1.8573, "step": 22899 }, { "epoch": 0.8200977671137214, "grad_norm": 1.4160634279251099, "learning_rate": 1.6497517258909267e-05, "loss": 1.4247, "step": 22900 }, { "epoch": 0.8201335792432897, "grad_norm": 1.8023004531860352, "learning_rate": 1.6491135901442567e-05, "loss": 1.4982, "step": 22901 }, { "epoch": 0.8201693913728579, "grad_norm": 1.3858096599578857, "learning_rate": 1.648475566747991e-05, "loss": 1.4687, "step": 22902 }, { "epoch": 0.8202052035024263, "grad_norm": 3.3740923404693604, "learning_rate": 1.6478376557107145e-05, "loss": 1.4911, "step": 22903 }, { "epoch": 0.8202410156319946, "grad_norm": 1.7053208351135254, "learning_rate": 1.647199857041011e-05, "loss": 1.6013, "step": 22904 }, { "epoch": 0.8202768277615629, "grad_norm": 1.5999023914337158, "learning_rate": 1.6465621707474587e-05, "loss": 1.5928, "step": 22905 }, { "epoch": 0.8203126398911311, "grad_norm": 2.348151206970215, "learning_rate": 1.6459245968386327e-05, "loss": 1.6773, "step": 22906 }, { "epoch": 0.8203484520206994, "grad_norm": 1.8074836730957031, "learning_rate": 1.64528713532312e-05, "loss": 1.5876, "step": 22907 }, { "epoch": 0.8203842641502677, "grad_norm": 1.8491315841674805, "learning_rate": 1.64464978620949e-05, "loss": 1.4896, "step": 22908 }, { "epoch": 0.8204200762798359, "grad_norm": 1.720956563949585, "learning_rate": 1.6440125495063185e-05, "loss": 1.3682, "step": 22909 }, { "epoch": 0.8204558884094043, "grad_norm": 1.3919970989227295, "learning_rate": 1.643375425222181e-05, "loss": 1.6958, "step": 22910 }, { "epoch": 0.8204917005389726, "grad_norm": 1.6565746068954468, "learning_rate": 1.6427384133656498e-05, "loss": 1.1266, "step": 22911 }, { "epoch": 0.8205275126685408, "grad_norm": 1.7313103675842285, "learning_rate": 1.64210151394529e-05, "loss": 1.4588, "step": 22912 }, { "epoch": 0.8205633247981091, "grad_norm": 1.5546603202819824, "learning_rate": 1.641464726969675e-05, "loss": 1.3061, "step": 22913 }, { "epoch": 0.8205991369276774, "grad_norm": 1.9309124946594238, "learning_rate": 1.6408280524473706e-05, "loss": 1.6808, "step": 22914 }, { "epoch": 0.8206349490572457, "grad_norm": 1.3096654415130615, "learning_rate": 1.640191490386942e-05, "loss": 1.4659, "step": 22915 }, { "epoch": 0.8206707611868139, "grad_norm": 1.6897329092025757, "learning_rate": 1.6395550407969552e-05, "loss": 1.2159, "step": 22916 }, { "epoch": 0.8207065733163823, "grad_norm": 1.5496948957443237, "learning_rate": 1.6389187036859655e-05, "loss": 1.4833, "step": 22917 }, { "epoch": 0.8207423854459506, "grad_norm": 2.1769845485687256, "learning_rate": 1.638282479062545e-05, "loss": 1.7208, "step": 22918 }, { "epoch": 0.8207781975755188, "grad_norm": 1.4986789226531982, "learning_rate": 1.637646366935246e-05, "loss": 1.3027, "step": 22919 }, { "epoch": 0.8208140097050871, "grad_norm": 1.4664111137390137, "learning_rate": 1.6370103673126267e-05, "loss": 1.5531, "step": 22920 }, { "epoch": 0.8208498218346554, "grad_norm": 1.5579311847686768, "learning_rate": 1.6363744802032476e-05, "loss": 1.5757, "step": 22921 }, { "epoch": 0.8208856339642236, "grad_norm": 1.549301266670227, "learning_rate": 1.6357387056156626e-05, "loss": 1.5303, "step": 22922 }, { "epoch": 0.8209214460937919, "grad_norm": 1.373755931854248, "learning_rate": 1.6351030435584245e-05, "loss": 1.5735, "step": 22923 }, { "epoch": 0.8209572582233603, "grad_norm": 1.8269177675247192, "learning_rate": 1.6344674940400805e-05, "loss": 1.607, "step": 22924 }, { "epoch": 0.8209930703529286, "grad_norm": 1.6167367696762085, "learning_rate": 1.633832057069191e-05, "loss": 1.2946, "step": 22925 }, { "epoch": 0.8210288824824968, "grad_norm": 1.9242609739303589, "learning_rate": 1.6331967326542963e-05, "loss": 1.5833, "step": 22926 }, { "epoch": 0.8210646946120651, "grad_norm": 1.805993676185608, "learning_rate": 1.63256152080395e-05, "loss": 1.2707, "step": 22927 }, { "epoch": 0.8211005067416334, "grad_norm": 1.4096330404281616, "learning_rate": 1.6319264215266894e-05, "loss": 1.2395, "step": 22928 }, { "epoch": 0.8211363188712016, "grad_norm": 1.5715415477752686, "learning_rate": 1.6312914348310704e-05, "loss": 1.3947, "step": 22929 }, { "epoch": 0.8211721310007699, "grad_norm": 1.2842967510223389, "learning_rate": 1.6306565607256285e-05, "loss": 1.0019, "step": 22930 }, { "epoch": 0.8212079431303383, "grad_norm": 1.9133511781692505, "learning_rate": 1.6300217992189082e-05, "loss": 1.8043, "step": 22931 }, { "epoch": 0.8212437552599066, "grad_norm": 1.7129391431808472, "learning_rate": 1.6293871503194458e-05, "loss": 1.3144, "step": 22932 }, { "epoch": 0.8212795673894748, "grad_norm": 1.822322130203247, "learning_rate": 1.6287526140357822e-05, "loss": 1.4964, "step": 22933 }, { "epoch": 0.8213153795190431, "grad_norm": 1.658325433731079, "learning_rate": 1.6281181903764565e-05, "loss": 1.5703, "step": 22934 }, { "epoch": 0.8213511916486114, "grad_norm": 1.6791235208511353, "learning_rate": 1.627483879349997e-05, "loss": 1.4307, "step": 22935 }, { "epoch": 0.8213870037781796, "grad_norm": 1.326132893562317, "learning_rate": 1.626849680964947e-05, "loss": 1.3838, "step": 22936 }, { "epoch": 0.8214228159077479, "grad_norm": 1.37123441696167, "learning_rate": 1.6262155952298307e-05, "loss": 1.1731, "step": 22937 }, { "epoch": 0.8214586280373163, "grad_norm": 1.4056603908538818, "learning_rate": 1.625581622153186e-05, "loss": 1.4067, "step": 22938 }, { "epoch": 0.8214944401668846, "grad_norm": 1.6344373226165771, "learning_rate": 1.6249477617435327e-05, "loss": 1.4917, "step": 22939 }, { "epoch": 0.8215302522964528, "grad_norm": 1.7621803283691406, "learning_rate": 1.6243140140094093e-05, "loss": 1.5295, "step": 22940 }, { "epoch": 0.8215660644260211, "grad_norm": 1.3223881721496582, "learning_rate": 1.6236803789593368e-05, "loss": 1.452, "step": 22941 }, { "epoch": 0.8216018765555894, "grad_norm": 1.669700026512146, "learning_rate": 1.6230468566018375e-05, "loss": 1.4759, "step": 22942 }, { "epoch": 0.8216376886851576, "grad_norm": 1.5705947875976562, "learning_rate": 1.6224134469454366e-05, "loss": 1.4296, "step": 22943 }, { "epoch": 0.8216735008147259, "grad_norm": 1.8033430576324463, "learning_rate": 1.6217801499986573e-05, "loss": 1.5341, "step": 22944 }, { "epoch": 0.8217093129442943, "grad_norm": 1.3500990867614746, "learning_rate": 1.6211469657700217e-05, "loss": 1.4331, "step": 22945 }, { "epoch": 0.8217451250738625, "grad_norm": 1.8312416076660156, "learning_rate": 1.6205138942680408e-05, "loss": 1.6042, "step": 22946 }, { "epoch": 0.8217809372034308, "grad_norm": 2.076237201690674, "learning_rate": 1.6198809355012412e-05, "loss": 1.4416, "step": 22947 }, { "epoch": 0.8218167493329991, "grad_norm": 2.3092427253723145, "learning_rate": 1.6192480894781316e-05, "loss": 1.4371, "step": 22948 }, { "epoch": 0.8218525614625674, "grad_norm": 1.5285634994506836, "learning_rate": 1.6186153562072316e-05, "loss": 1.6155, "step": 22949 }, { "epoch": 0.8218883735921356, "grad_norm": 1.3765846490859985, "learning_rate": 1.617982735697048e-05, "loss": 1.0938, "step": 22950 }, { "epoch": 0.8219241857217039, "grad_norm": 1.4085685014724731, "learning_rate": 1.6173502279560936e-05, "loss": 1.4514, "step": 22951 }, { "epoch": 0.8219599978512723, "grad_norm": 1.2525451183319092, "learning_rate": 1.6167178329928823e-05, "loss": 1.2433, "step": 22952 }, { "epoch": 0.8219958099808405, "grad_norm": 1.4547854661941528, "learning_rate": 1.6160855508159168e-05, "loss": 1.2984, "step": 22953 }, { "epoch": 0.8220316221104088, "grad_norm": 1.432128667831421, "learning_rate": 1.6154533814337058e-05, "loss": 1.5026, "step": 22954 }, { "epoch": 0.8220674342399771, "grad_norm": 1.9972702264785767, "learning_rate": 1.614821324854754e-05, "loss": 1.4845, "step": 22955 }, { "epoch": 0.8221032463695453, "grad_norm": 1.6977585554122925, "learning_rate": 1.6141893810875675e-05, "loss": 1.6666, "step": 22956 }, { "epoch": 0.8221390584991136, "grad_norm": 1.9989867210388184, "learning_rate": 1.6135575501406432e-05, "loss": 1.7976, "step": 22957 }, { "epoch": 0.8221748706286819, "grad_norm": 1.8499654531478882, "learning_rate": 1.6129258320224848e-05, "loss": 1.3219, "step": 22958 }, { "epoch": 0.8222106827582503, "grad_norm": 2.332317352294922, "learning_rate": 1.612294226741593e-05, "loss": 1.4394, "step": 22959 }, { "epoch": 0.8222464948878185, "grad_norm": 2.4020214080810547, "learning_rate": 1.6116627343064605e-05, "loss": 1.7889, "step": 22960 }, { "epoch": 0.8222823070173868, "grad_norm": 2.036348342895508, "learning_rate": 1.611031354725586e-05, "loss": 1.3781, "step": 22961 }, { "epoch": 0.8223181191469551, "grad_norm": 2.3071060180664062, "learning_rate": 1.6104000880074642e-05, "loss": 1.5629, "step": 22962 }, { "epoch": 0.8223539312765233, "grad_norm": 1.9471147060394287, "learning_rate": 1.6097689341605894e-05, "loss": 1.464, "step": 22963 }, { "epoch": 0.8223897434060916, "grad_norm": 1.335502028465271, "learning_rate": 1.6091378931934474e-05, "loss": 1.3216, "step": 22964 }, { "epoch": 0.8224255555356599, "grad_norm": 1.5332695245742798, "learning_rate": 1.6085069651145334e-05, "loss": 1.3925, "step": 22965 }, { "epoch": 0.8224613676652283, "grad_norm": 1.4483823776245117, "learning_rate": 1.6078761499323326e-05, "loss": 1.5104, "step": 22966 }, { "epoch": 0.8224971797947965, "grad_norm": 1.384883999824524, "learning_rate": 1.6072454476553357e-05, "loss": 1.522, "step": 22967 }, { "epoch": 0.8225329919243648, "grad_norm": 1.8154127597808838, "learning_rate": 1.6066148582920237e-05, "loss": 1.4938, "step": 22968 }, { "epoch": 0.8225688040539331, "grad_norm": 1.559505581855774, "learning_rate": 1.6059843818508814e-05, "loss": 1.2616, "step": 22969 }, { "epoch": 0.8226046161835013, "grad_norm": 1.9433817863464355, "learning_rate": 1.605354018340395e-05, "loss": 1.1542, "step": 22970 }, { "epoch": 0.8226404283130696, "grad_norm": 1.6815561056137085, "learning_rate": 1.6047237677690386e-05, "loss": 1.3772, "step": 22971 }, { "epoch": 0.8226762404426379, "grad_norm": 1.572665810585022, "learning_rate": 1.6040936301452957e-05, "loss": 1.7138, "step": 22972 }, { "epoch": 0.8227120525722063, "grad_norm": 1.9919036626815796, "learning_rate": 1.603463605477643e-05, "loss": 1.4052, "step": 22973 }, { "epoch": 0.8227478647017745, "grad_norm": 1.5342936515808105, "learning_rate": 1.602833693774558e-05, "loss": 1.0727, "step": 22974 }, { "epoch": 0.8227836768313428, "grad_norm": 1.8794121742248535, "learning_rate": 1.6022038950445127e-05, "loss": 1.6559, "step": 22975 }, { "epoch": 0.8228194889609111, "grad_norm": 1.6604645252227783, "learning_rate": 1.6015742092959818e-05, "loss": 1.4897, "step": 22976 }, { "epoch": 0.8228553010904793, "grad_norm": 1.3487586975097656, "learning_rate": 1.6009446365374383e-05, "loss": 1.3302, "step": 22977 }, { "epoch": 0.8228911132200476, "grad_norm": 1.6160697937011719, "learning_rate": 1.6003151767773485e-05, "loss": 1.1905, "step": 22978 }, { "epoch": 0.8229269253496159, "grad_norm": 2.0315003395080566, "learning_rate": 1.5996858300241834e-05, "loss": 1.4471, "step": 22979 }, { "epoch": 0.8229627374791842, "grad_norm": 1.3102805614471436, "learning_rate": 1.5990565962864103e-05, "loss": 1.4466, "step": 22980 }, { "epoch": 0.8229985496087525, "grad_norm": 1.7477620840072632, "learning_rate": 1.5984274755724958e-05, "loss": 1.5908, "step": 22981 }, { "epoch": 0.8230343617383208, "grad_norm": 1.6393771171569824, "learning_rate": 1.5977984678909008e-05, "loss": 1.3496, "step": 22982 }, { "epoch": 0.823070173867889, "grad_norm": 2.298346519470215, "learning_rate": 1.597169573250089e-05, "loss": 1.623, "step": 22983 }, { "epoch": 0.8231059859974573, "grad_norm": 1.93317711353302, "learning_rate": 1.5965407916585208e-05, "loss": 1.3362, "step": 22984 }, { "epoch": 0.8231417981270256, "grad_norm": 1.3797252178192139, "learning_rate": 1.59591212312466e-05, "loss": 1.1094, "step": 22985 }, { "epoch": 0.8231776102565939, "grad_norm": 1.945043921470642, "learning_rate": 1.595283567656959e-05, "loss": 1.5189, "step": 22986 }, { "epoch": 0.8232134223861622, "grad_norm": 1.6652638912200928, "learning_rate": 1.5946551252638754e-05, "loss": 1.728, "step": 22987 }, { "epoch": 0.8232492345157305, "grad_norm": 1.7131400108337402, "learning_rate": 1.594026795953868e-05, "loss": 1.1332, "step": 22988 }, { "epoch": 0.8232850466452988, "grad_norm": 1.8453240394592285, "learning_rate": 1.5933985797353844e-05, "loss": 1.5197, "step": 22989 }, { "epoch": 0.823320858774867, "grad_norm": 2.4557993412017822, "learning_rate": 1.5927704766168793e-05, "loss": 1.5135, "step": 22990 }, { "epoch": 0.8233566709044353, "grad_norm": 1.5093895196914673, "learning_rate": 1.5921424866068026e-05, "loss": 1.7071, "step": 22991 }, { "epoch": 0.8233924830340036, "grad_norm": 1.661433219909668, "learning_rate": 1.5915146097136056e-05, "loss": 1.4739, "step": 22992 }, { "epoch": 0.8234282951635719, "grad_norm": 1.4338335990905762, "learning_rate": 1.5908868459457317e-05, "loss": 1.4, "step": 22993 }, { "epoch": 0.8234641072931402, "grad_norm": 1.3473552465438843, "learning_rate": 1.5902591953116287e-05, "loss": 0.9404, "step": 22994 }, { "epoch": 0.8234999194227085, "grad_norm": 1.7865482568740845, "learning_rate": 1.589631657819741e-05, "loss": 1.5638, "step": 22995 }, { "epoch": 0.8235357315522768, "grad_norm": 1.594167709350586, "learning_rate": 1.5890042334785104e-05, "loss": 1.2829, "step": 22996 }, { "epoch": 0.823571543681845, "grad_norm": 1.8517452478408813, "learning_rate": 1.5883769222963775e-05, "loss": 1.4047, "step": 22997 }, { "epoch": 0.8236073558114133, "grad_norm": 1.841001033782959, "learning_rate": 1.587749724281783e-05, "loss": 1.166, "step": 22998 }, { "epoch": 0.8236431679409816, "grad_norm": 1.69795823097229, "learning_rate": 1.5871226394431672e-05, "loss": 1.6688, "step": 22999 }, { "epoch": 0.8236789800705498, "grad_norm": 1.5626368522644043, "learning_rate": 1.586495667788962e-05, "loss": 1.3707, "step": 23000 }, { "epoch": 0.8237147922001182, "grad_norm": 1.3981742858886719, "learning_rate": 1.5858688093276042e-05, "loss": 1.0959, "step": 23001 }, { "epoch": 0.8237506043296865, "grad_norm": 2.0931923389434814, "learning_rate": 1.5852420640675313e-05, "loss": 1.35, "step": 23002 }, { "epoch": 0.8237864164592548, "grad_norm": 2.05471134185791, "learning_rate": 1.5846154320171703e-05, "loss": 1.7002, "step": 23003 }, { "epoch": 0.823822228588823, "grad_norm": 1.5984715223312378, "learning_rate": 1.583988913184953e-05, "loss": 1.4799, "step": 23004 }, { "epoch": 0.8238580407183913, "grad_norm": 1.48750638961792, "learning_rate": 1.583362507579309e-05, "loss": 1.209, "step": 23005 }, { "epoch": 0.8238938528479596, "grad_norm": 1.6805229187011719, "learning_rate": 1.582736215208669e-05, "loss": 1.3504, "step": 23006 }, { "epoch": 0.8239296649775278, "grad_norm": 1.4237467050552368, "learning_rate": 1.582110036081452e-05, "loss": 1.0545, "step": 23007 }, { "epoch": 0.8239654771070962, "grad_norm": 1.6374319791793823, "learning_rate": 1.581483970206087e-05, "loss": 1.4251, "step": 23008 }, { "epoch": 0.8240012892366645, "grad_norm": 1.6855441331863403, "learning_rate": 1.580858017590996e-05, "loss": 1.3527, "step": 23009 }, { "epoch": 0.8240371013662328, "grad_norm": 1.4074074029922485, "learning_rate": 1.5802321782446028e-05, "loss": 1.3838, "step": 23010 }, { "epoch": 0.824072913495801, "grad_norm": 1.5578721761703491, "learning_rate": 1.5796064521753252e-05, "loss": 1.4217, "step": 23011 }, { "epoch": 0.8241087256253693, "grad_norm": 1.4159759283065796, "learning_rate": 1.5789808393915763e-05, "loss": 1.5785, "step": 23012 }, { "epoch": 0.8241445377549376, "grad_norm": 1.645815372467041, "learning_rate": 1.5783553399017825e-05, "loss": 1.3073, "step": 23013 }, { "epoch": 0.8241803498845058, "grad_norm": 1.8680726289749146, "learning_rate": 1.577729953714352e-05, "loss": 1.4468, "step": 23014 }, { "epoch": 0.8242161620140742, "grad_norm": 1.9843326807022095, "learning_rate": 1.577104680837703e-05, "loss": 1.527, "step": 23015 }, { "epoch": 0.8242519741436425, "grad_norm": 1.7631583213806152, "learning_rate": 1.576479521280242e-05, "loss": 1.6603, "step": 23016 }, { "epoch": 0.8242877862732108, "grad_norm": 1.3905339241027832, "learning_rate": 1.575854475050388e-05, "loss": 1.0263, "step": 23017 }, { "epoch": 0.824323598402779, "grad_norm": 1.588399887084961, "learning_rate": 1.5752295421565423e-05, "loss": 1.5257, "step": 23018 }, { "epoch": 0.8243594105323473, "grad_norm": 2.075728178024292, "learning_rate": 1.574604722607117e-05, "loss": 1.4954, "step": 23019 }, { "epoch": 0.8243952226619156, "grad_norm": 1.729669213294983, "learning_rate": 1.573980016410519e-05, "loss": 1.5427, "step": 23020 }, { "epoch": 0.8244310347914838, "grad_norm": 1.457468867301941, "learning_rate": 1.573355423575149e-05, "loss": 1.5744, "step": 23021 }, { "epoch": 0.8244668469210522, "grad_norm": 2.133328676223755, "learning_rate": 1.572730944109415e-05, "loss": 1.3736, "step": 23022 }, { "epoch": 0.8245026590506205, "grad_norm": 1.4043382406234741, "learning_rate": 1.5721065780217103e-05, "loss": 1.4783, "step": 23023 }, { "epoch": 0.8245384711801887, "grad_norm": 2.212798833847046, "learning_rate": 1.5714823253204447e-05, "loss": 1.6248, "step": 23024 }, { "epoch": 0.824574283309757, "grad_norm": 1.4132270812988281, "learning_rate": 1.5708581860140113e-05, "loss": 1.2569, "step": 23025 }, { "epoch": 0.8246100954393253, "grad_norm": 1.8781002759933472, "learning_rate": 1.5702341601108094e-05, "loss": 1.5612, "step": 23026 }, { "epoch": 0.8246459075688936, "grad_norm": 1.9271550178527832, "learning_rate": 1.56961024761923e-05, "loss": 1.4968, "step": 23027 }, { "epoch": 0.8246817196984618, "grad_norm": 1.5880156755447388, "learning_rate": 1.5689864485476736e-05, "loss": 1.3802, "step": 23028 }, { "epoch": 0.8247175318280302, "grad_norm": 1.7311402559280396, "learning_rate": 1.5683627629045295e-05, "loss": 1.5533, "step": 23029 }, { "epoch": 0.8247533439575985, "grad_norm": 1.6801574230194092, "learning_rate": 1.5677391906981842e-05, "loss": 1.268, "step": 23030 }, { "epoch": 0.8247891560871667, "grad_norm": 1.9536669254302979, "learning_rate": 1.5671157319370357e-05, "loss": 1.4332, "step": 23031 }, { "epoch": 0.824824968216735, "grad_norm": 1.3697175979614258, "learning_rate": 1.5664923866294655e-05, "loss": 1.2412, "step": 23032 }, { "epoch": 0.8248607803463033, "grad_norm": 1.6064835786819458, "learning_rate": 1.565869154783863e-05, "loss": 1.5359, "step": 23033 }, { "epoch": 0.8248965924758715, "grad_norm": 1.9790090322494507, "learning_rate": 1.5652460364086084e-05, "loss": 1.4676, "step": 23034 }, { "epoch": 0.8249324046054398, "grad_norm": 1.8181332349777222, "learning_rate": 1.5646230315120923e-05, "loss": 1.3089, "step": 23035 }, { "epoch": 0.8249682167350082, "grad_norm": 1.756899356842041, "learning_rate": 1.5640001401026904e-05, "loss": 1.4869, "step": 23036 }, { "epoch": 0.8250040288645765, "grad_norm": 1.8076976537704468, "learning_rate": 1.5633773621887872e-05, "loss": 1.1611, "step": 23037 }, { "epoch": 0.8250398409941447, "grad_norm": 1.5319172143936157, "learning_rate": 1.5627546977787565e-05, "loss": 1.4351, "step": 23038 }, { "epoch": 0.825075653123713, "grad_norm": 1.5525784492492676, "learning_rate": 1.5621321468809778e-05, "loss": 1.4875, "step": 23039 }, { "epoch": 0.8251114652532813, "grad_norm": 2.0940845012664795, "learning_rate": 1.56150970950383e-05, "loss": 1.1187, "step": 23040 }, { "epoch": 0.8251472773828495, "grad_norm": 1.537346601486206, "learning_rate": 1.5608873856556828e-05, "loss": 1.3419, "step": 23041 }, { "epoch": 0.8251830895124178, "grad_norm": 1.681191325187683, "learning_rate": 1.5602651753449083e-05, "loss": 1.2589, "step": 23042 }, { "epoch": 0.8252189016419862, "grad_norm": 1.7197188138961792, "learning_rate": 1.5596430785798798e-05, "loss": 1.5691, "step": 23043 }, { "epoch": 0.8252547137715545, "grad_norm": 1.6822034120559692, "learning_rate": 1.55902109536897e-05, "loss": 1.2887, "step": 23044 }, { "epoch": 0.8252905259011227, "grad_norm": 2.1965887546539307, "learning_rate": 1.558399225720537e-05, "loss": 1.1151, "step": 23045 }, { "epoch": 0.825326338030691, "grad_norm": 1.4301897287368774, "learning_rate": 1.5577774696429592e-05, "loss": 1.5266, "step": 23046 }, { "epoch": 0.8253621501602593, "grad_norm": 1.6066406965255737, "learning_rate": 1.5571558271445952e-05, "loss": 1.5269, "step": 23047 }, { "epoch": 0.8253979622898275, "grad_norm": 1.7901389598846436, "learning_rate": 1.556534298233807e-05, "loss": 1.2883, "step": 23048 }, { "epoch": 0.8254337744193958, "grad_norm": 1.3142569065093994, "learning_rate": 1.5559128829189597e-05, "loss": 1.6844, "step": 23049 }, { "epoch": 0.8254695865489642, "grad_norm": 1.5939184427261353, "learning_rate": 1.5552915812084113e-05, "loss": 1.3867, "step": 23050 }, { "epoch": 0.8255053986785325, "grad_norm": 2.42671275138855, "learning_rate": 1.5546703931105233e-05, "loss": 1.5441, "step": 23051 }, { "epoch": 0.8255412108081007, "grad_norm": 2.1185123920440674, "learning_rate": 1.5540493186336503e-05, "loss": 1.5006, "step": 23052 }, { "epoch": 0.825577022937669, "grad_norm": 1.4602375030517578, "learning_rate": 1.5534283577861497e-05, "loss": 1.3002, "step": 23053 }, { "epoch": 0.8256128350672373, "grad_norm": 1.9092152118682861, "learning_rate": 1.552807510576374e-05, "loss": 1.5135, "step": 23054 }, { "epoch": 0.8256486471968055, "grad_norm": 2.400681734085083, "learning_rate": 1.5521867770126795e-05, "loss": 1.466, "step": 23055 }, { "epoch": 0.8256844593263738, "grad_norm": 1.5862021446228027, "learning_rate": 1.5515661571034134e-05, "loss": 1.2344, "step": 23056 }, { "epoch": 0.8257202714559422, "grad_norm": 2.3739986419677734, "learning_rate": 1.5509456508569275e-05, "loss": 1.4641, "step": 23057 }, { "epoch": 0.8257560835855104, "grad_norm": 1.941270351409912, "learning_rate": 1.5503252582815707e-05, "loss": 1.5392, "step": 23058 }, { "epoch": 0.8257918957150787, "grad_norm": 1.816595435142517, "learning_rate": 1.5497049793856868e-05, "loss": 1.1223, "step": 23059 }, { "epoch": 0.825827707844647, "grad_norm": 1.351844072341919, "learning_rate": 1.5490848141776214e-05, "loss": 1.1367, "step": 23060 }, { "epoch": 0.8258635199742153, "grad_norm": 1.4327374696731567, "learning_rate": 1.548464762665719e-05, "loss": 1.3438, "step": 23061 }, { "epoch": 0.8258993321037835, "grad_norm": 1.515933632850647, "learning_rate": 1.5478448248583244e-05, "loss": 1.2234, "step": 23062 }, { "epoch": 0.8259351442333518, "grad_norm": 1.2966159582138062, "learning_rate": 1.5472250007637724e-05, "loss": 1.4408, "step": 23063 }, { "epoch": 0.8259709563629202, "grad_norm": 1.9407875537872314, "learning_rate": 1.546605290390405e-05, "loss": 1.5882, "step": 23064 }, { "epoch": 0.8260067684924884, "grad_norm": 2.346935749053955, "learning_rate": 1.545985693746561e-05, "loss": 1.293, "step": 23065 }, { "epoch": 0.8260425806220567, "grad_norm": 1.5052082538604736, "learning_rate": 1.545366210840573e-05, "loss": 1.3619, "step": 23066 }, { "epoch": 0.826078392751625, "grad_norm": 1.547210693359375, "learning_rate": 1.5447468416807766e-05, "loss": 1.5157, "step": 23067 }, { "epoch": 0.8261142048811932, "grad_norm": 1.6457045078277588, "learning_rate": 1.5441275862755043e-05, "loss": 1.1963, "step": 23068 }, { "epoch": 0.8261500170107615, "grad_norm": 1.628400206565857, "learning_rate": 1.5435084446330917e-05, "loss": 1.6531, "step": 23069 }, { "epoch": 0.8261858291403298, "grad_norm": 1.6187325716018677, "learning_rate": 1.5428894167618622e-05, "loss": 1.6925, "step": 23070 }, { "epoch": 0.8262216412698982, "grad_norm": 1.649355411529541, "learning_rate": 1.5422705026701468e-05, "loss": 1.3264, "step": 23071 }, { "epoch": 0.8262574533994664, "grad_norm": 1.924761176109314, "learning_rate": 1.5416517023662713e-05, "loss": 1.6002, "step": 23072 }, { "epoch": 0.8262932655290347, "grad_norm": 1.5182862281799316, "learning_rate": 1.541033015858565e-05, "loss": 1.3946, "step": 23073 }, { "epoch": 0.826329077658603, "grad_norm": 1.9584239721298218, "learning_rate": 1.540414443155345e-05, "loss": 1.4373, "step": 23074 }, { "epoch": 0.8263648897881712, "grad_norm": 1.620587706565857, "learning_rate": 1.5397959842649367e-05, "loss": 1.5348, "step": 23075 }, { "epoch": 0.8264007019177395, "grad_norm": 1.8111913204193115, "learning_rate": 1.5391776391956638e-05, "loss": 1.8022, "step": 23076 }, { "epoch": 0.8264365140473078, "grad_norm": 1.7288938760757446, "learning_rate": 1.5385594079558387e-05, "loss": 1.4836, "step": 23077 }, { "epoch": 0.8264723261768762, "grad_norm": 1.5004984140396118, "learning_rate": 1.5379412905537828e-05, "loss": 1.4875, "step": 23078 }, { "epoch": 0.8265081383064444, "grad_norm": 2.3542582988739014, "learning_rate": 1.5373232869978116e-05, "loss": 1.5091, "step": 23079 }, { "epoch": 0.8265439504360127, "grad_norm": 1.4314318895339966, "learning_rate": 1.5367053972962408e-05, "loss": 1.3066, "step": 23080 }, { "epoch": 0.826579762565581, "grad_norm": 1.7067185640335083, "learning_rate": 1.5360876214573806e-05, "loss": 1.3752, "step": 23081 }, { "epoch": 0.8266155746951492, "grad_norm": 1.7711745500564575, "learning_rate": 1.5354699594895438e-05, "loss": 1.4336, "step": 23082 }, { "epoch": 0.8266513868247175, "grad_norm": 2.183694839477539, "learning_rate": 1.534852411401043e-05, "loss": 1.2257, "step": 23083 }, { "epoch": 0.8266871989542858, "grad_norm": 1.6529567241668701, "learning_rate": 1.5342349772001808e-05, "loss": 1.4126, "step": 23084 }, { "epoch": 0.8267230110838542, "grad_norm": 2.0304243564605713, "learning_rate": 1.5336176568952666e-05, "loss": 1.3902, "step": 23085 }, { "epoch": 0.8267588232134224, "grad_norm": 1.541917324066162, "learning_rate": 1.5330004504946072e-05, "loss": 1.6367, "step": 23086 }, { "epoch": 0.8267946353429907, "grad_norm": 1.8716808557510376, "learning_rate": 1.532383358006506e-05, "loss": 1.4428, "step": 23087 }, { "epoch": 0.826830447472559, "grad_norm": 1.713232398033142, "learning_rate": 1.5317663794392634e-05, "loss": 1.2549, "step": 23088 }, { "epoch": 0.8268662596021272, "grad_norm": 1.4402745962142944, "learning_rate": 1.53114951480118e-05, "loss": 1.3368, "step": 23089 }, { "epoch": 0.8269020717316955, "grad_norm": 2.111379384994507, "learning_rate": 1.5305327641005584e-05, "loss": 1.5311, "step": 23090 }, { "epoch": 0.8269378838612638, "grad_norm": 1.7146985530853271, "learning_rate": 1.5299161273456907e-05, "loss": 1.5118, "step": 23091 }, { "epoch": 0.8269736959908321, "grad_norm": 1.5040332078933716, "learning_rate": 1.529299604544876e-05, "loss": 1.0758, "step": 23092 }, { "epoch": 0.8270095081204004, "grad_norm": 1.687289834022522, "learning_rate": 1.5286831957064095e-05, "loss": 1.294, "step": 23093 }, { "epoch": 0.8270453202499687, "grad_norm": 1.5885672569274902, "learning_rate": 1.528066900838585e-05, "loss": 1.3912, "step": 23094 }, { "epoch": 0.827081132379537, "grad_norm": 1.3895875215530396, "learning_rate": 1.5274507199496913e-05, "loss": 1.2929, "step": 23095 }, { "epoch": 0.8271169445091052, "grad_norm": 1.6085909605026245, "learning_rate": 1.526834653048018e-05, "loss": 1.3754, "step": 23096 }, { "epoch": 0.8271527566386735, "grad_norm": 1.6051825284957886, "learning_rate": 1.526218700141855e-05, "loss": 1.41, "step": 23097 }, { "epoch": 0.8271885687682418, "grad_norm": 1.4668638706207275, "learning_rate": 1.5256028612394913e-05, "loss": 1.2962, "step": 23098 }, { "epoch": 0.8272243808978101, "grad_norm": 1.2707617282867432, "learning_rate": 1.5249871363492107e-05, "loss": 1.3112, "step": 23099 }, { "epoch": 0.8272601930273784, "grad_norm": 1.6298561096191406, "learning_rate": 1.5243715254792912e-05, "loss": 1.605, "step": 23100 }, { "epoch": 0.8272960051569467, "grad_norm": 1.4971948862075806, "learning_rate": 1.5237560286380247e-05, "loss": 1.1962, "step": 23101 }, { "epoch": 0.827331817286515, "grad_norm": 1.3633439540863037, "learning_rate": 1.5231406458336839e-05, "loss": 1.4394, "step": 23102 }, { "epoch": 0.8273676294160832, "grad_norm": 1.8583279848098755, "learning_rate": 1.5225253770745529e-05, "loss": 1.6863, "step": 23103 }, { "epoch": 0.8274034415456515, "grad_norm": 1.6401985883712769, "learning_rate": 1.5219102223689074e-05, "loss": 1.5851, "step": 23104 }, { "epoch": 0.8274392536752198, "grad_norm": 1.8377301692962646, "learning_rate": 1.5212951817250253e-05, "loss": 1.2312, "step": 23105 }, { "epoch": 0.8274750658047881, "grad_norm": 1.7044795751571655, "learning_rate": 1.5206802551511778e-05, "loss": 1.8088, "step": 23106 }, { "epoch": 0.8275108779343564, "grad_norm": 1.5256201028823853, "learning_rate": 1.5200654426556405e-05, "loss": 1.4694, "step": 23107 }, { "epoch": 0.8275466900639247, "grad_norm": 1.6948037147521973, "learning_rate": 1.5194507442466865e-05, "loss": 1.3478, "step": 23108 }, { "epoch": 0.8275825021934929, "grad_norm": 1.5120340585708618, "learning_rate": 1.5188361599325817e-05, "loss": 1.3614, "step": 23109 }, { "epoch": 0.8276183143230612, "grad_norm": 1.5714203119277954, "learning_rate": 1.5182216897215984e-05, "loss": 1.6772, "step": 23110 }, { "epoch": 0.8276541264526295, "grad_norm": 1.667129397392273, "learning_rate": 1.5176073336219965e-05, "loss": 1.205, "step": 23111 }, { "epoch": 0.8276899385821977, "grad_norm": 1.733232855796814, "learning_rate": 1.5169930916420516e-05, "loss": 1.3543, "step": 23112 }, { "epoch": 0.8277257507117661, "grad_norm": 1.5604608058929443, "learning_rate": 1.5163789637900194e-05, "loss": 1.5258, "step": 23113 }, { "epoch": 0.8277615628413344, "grad_norm": 1.5304710865020752, "learning_rate": 1.5157649500741678e-05, "loss": 1.4189, "step": 23114 }, { "epoch": 0.8277973749709027, "grad_norm": 2.146677017211914, "learning_rate": 1.5151510505027499e-05, "loss": 1.3662, "step": 23115 }, { "epoch": 0.8278331871004709, "grad_norm": 2.4017913341522217, "learning_rate": 1.5145372650840361e-05, "loss": 1.4622, "step": 23116 }, { "epoch": 0.8278689992300392, "grad_norm": 1.358140468597412, "learning_rate": 1.5139235938262763e-05, "loss": 1.4776, "step": 23117 }, { "epoch": 0.8279048113596075, "grad_norm": 2.4922754764556885, "learning_rate": 1.513310036737724e-05, "loss": 1.7687, "step": 23118 }, { "epoch": 0.8279406234891757, "grad_norm": 1.3777458667755127, "learning_rate": 1.5126965938266436e-05, "loss": 1.504, "step": 23119 }, { "epoch": 0.8279764356187441, "grad_norm": 1.5722744464874268, "learning_rate": 1.5120832651012795e-05, "loss": 1.3428, "step": 23120 }, { "epoch": 0.8280122477483124, "grad_norm": 2.008540153503418, "learning_rate": 1.5114700505698886e-05, "loss": 1.3799, "step": 23121 }, { "epoch": 0.8280480598778807, "grad_norm": 1.4039058685302734, "learning_rate": 1.5108569502407155e-05, "loss": 1.3482, "step": 23122 }, { "epoch": 0.8280838720074489, "grad_norm": 1.4145723581314087, "learning_rate": 1.5102439641220156e-05, "loss": 1.3316, "step": 23123 }, { "epoch": 0.8281196841370172, "grad_norm": 1.5959590673446655, "learning_rate": 1.5096310922220291e-05, "loss": 1.7295, "step": 23124 }, { "epoch": 0.8281554962665855, "grad_norm": 1.5081450939178467, "learning_rate": 1.5090183345490084e-05, "loss": 1.2043, "step": 23125 }, { "epoch": 0.8281913083961537, "grad_norm": 1.61405348777771, "learning_rate": 1.50840569111119e-05, "loss": 1.5296, "step": 23126 }, { "epoch": 0.8282271205257221, "grad_norm": 2.1915838718414307, "learning_rate": 1.5077931619168196e-05, "loss": 1.2124, "step": 23127 }, { "epoch": 0.8282629326552904, "grad_norm": 1.5725104808807373, "learning_rate": 1.5071807469741406e-05, "loss": 1.4422, "step": 23128 }, { "epoch": 0.8282987447848587, "grad_norm": 1.9400559663772583, "learning_rate": 1.5065684462913853e-05, "loss": 1.4408, "step": 23129 }, { "epoch": 0.8283345569144269, "grad_norm": 1.7734508514404297, "learning_rate": 1.5059562598768007e-05, "loss": 1.6698, "step": 23130 }, { "epoch": 0.8283703690439952, "grad_norm": 1.7858387231826782, "learning_rate": 1.5053441877386154e-05, "loss": 1.4474, "step": 23131 }, { "epoch": 0.8284061811735635, "grad_norm": 1.9878989458084106, "learning_rate": 1.5047322298850685e-05, "loss": 1.1392, "step": 23132 }, { "epoch": 0.8284419933031317, "grad_norm": 1.5624104738235474, "learning_rate": 1.504120386324387e-05, "loss": 1.5743, "step": 23133 }, { "epoch": 0.8284778054327001, "grad_norm": 1.580743432044983, "learning_rate": 1.5035086570648115e-05, "loss": 1.2758, "step": 23134 }, { "epoch": 0.8285136175622684, "grad_norm": 1.7219854593276978, "learning_rate": 1.5028970421145684e-05, "loss": 1.2629, "step": 23135 }, { "epoch": 0.8285494296918366, "grad_norm": 1.9313101768493652, "learning_rate": 1.5022855414818816e-05, "loss": 1.3722, "step": 23136 }, { "epoch": 0.8285852418214049, "grad_norm": 1.6862903833389282, "learning_rate": 1.5016741551749813e-05, "loss": 1.4906, "step": 23137 }, { "epoch": 0.8286210539509732, "grad_norm": 2.225872039794922, "learning_rate": 1.5010628832020945e-05, "loss": 1.6626, "step": 23138 }, { "epoch": 0.8286568660805415, "grad_norm": 1.539825201034546, "learning_rate": 1.5004517255714456e-05, "loss": 1.7301, "step": 23139 }, { "epoch": 0.8286926782101097, "grad_norm": 1.5023062229156494, "learning_rate": 1.4998406822912525e-05, "loss": 1.6564, "step": 23140 }, { "epoch": 0.8287284903396781, "grad_norm": 2.535398483276367, "learning_rate": 1.4992297533697387e-05, "loss": 1.4908, "step": 23141 }, { "epoch": 0.8287643024692464, "grad_norm": 1.7443580627441406, "learning_rate": 1.4986189388151229e-05, "loss": 1.3793, "step": 23142 }, { "epoch": 0.8288001145988146, "grad_norm": 1.728061318397522, "learning_rate": 1.4980082386356264e-05, "loss": 1.246, "step": 23143 }, { "epoch": 0.8288359267283829, "grad_norm": 1.9140900373458862, "learning_rate": 1.4973976528394596e-05, "loss": 1.2107, "step": 23144 }, { "epoch": 0.8288717388579512, "grad_norm": 2.0008881092071533, "learning_rate": 1.4967871814348399e-05, "loss": 1.4527, "step": 23145 }, { "epoch": 0.8289075509875194, "grad_norm": 1.3603802919387817, "learning_rate": 1.4961768244299823e-05, "loss": 1.2612, "step": 23146 }, { "epoch": 0.8289433631170877, "grad_norm": 1.8144201040267944, "learning_rate": 1.4955665818330944e-05, "loss": 1.5788, "step": 23147 }, { "epoch": 0.8289791752466561, "grad_norm": 1.1382317543029785, "learning_rate": 1.4949564536523874e-05, "loss": 1.1458, "step": 23148 }, { "epoch": 0.8290149873762244, "grad_norm": 1.77524995803833, "learning_rate": 1.4943464398960716e-05, "loss": 1.6568, "step": 23149 }, { "epoch": 0.8290507995057926, "grad_norm": 1.7425357103347778, "learning_rate": 1.4937365405723547e-05, "loss": 1.3626, "step": 23150 }, { "epoch": 0.8290866116353609, "grad_norm": 1.9161444902420044, "learning_rate": 1.493126755689439e-05, "loss": 1.4522, "step": 23151 }, { "epoch": 0.8291224237649292, "grad_norm": 1.7542110681533813, "learning_rate": 1.4925170852555282e-05, "loss": 1.3236, "step": 23152 }, { "epoch": 0.8291582358944974, "grad_norm": 1.5818971395492554, "learning_rate": 1.4919075292788298e-05, "loss": 1.3392, "step": 23153 }, { "epoch": 0.8291940480240657, "grad_norm": 1.3822681903839111, "learning_rate": 1.4912980877675387e-05, "loss": 1.3119, "step": 23154 }, { "epoch": 0.8292298601536341, "grad_norm": 1.6355019807815552, "learning_rate": 1.4906887607298548e-05, "loss": 1.9485, "step": 23155 }, { "epoch": 0.8292656722832024, "grad_norm": 1.661881685256958, "learning_rate": 1.4900795481739793e-05, "loss": 1.6326, "step": 23156 }, { "epoch": 0.8293014844127706, "grad_norm": 1.375794768333435, "learning_rate": 1.4894704501081069e-05, "loss": 1.4931, "step": 23157 }, { "epoch": 0.8293372965423389, "grad_norm": 1.861142873764038, "learning_rate": 1.488861466540431e-05, "loss": 1.466, "step": 23158 }, { "epoch": 0.8293731086719072, "grad_norm": 1.3190573453903198, "learning_rate": 1.488252597479145e-05, "loss": 1.3289, "step": 23159 }, { "epoch": 0.8294089208014754, "grad_norm": 1.687562108039856, "learning_rate": 1.4876438429324414e-05, "loss": 1.1553, "step": 23160 }, { "epoch": 0.8294447329310437, "grad_norm": 1.8980015516281128, "learning_rate": 1.487035202908511e-05, "loss": 1.4203, "step": 23161 }, { "epoch": 0.8294805450606121, "grad_norm": 2.008462905883789, "learning_rate": 1.4864266774155389e-05, "loss": 1.3749, "step": 23162 }, { "epoch": 0.8295163571901804, "grad_norm": 1.576193928718567, "learning_rate": 1.4858182664617148e-05, "loss": 1.2269, "step": 23163 }, { "epoch": 0.8295521693197486, "grad_norm": 1.642438530921936, "learning_rate": 1.4852099700552259e-05, "loss": 1.38, "step": 23164 }, { "epoch": 0.8295879814493169, "grad_norm": 1.7365227937698364, "learning_rate": 1.4846017882042506e-05, "loss": 1.4031, "step": 23165 }, { "epoch": 0.8296237935788852, "grad_norm": 1.6109135150909424, "learning_rate": 1.4839937209169741e-05, "loss": 1.2866, "step": 23166 }, { "epoch": 0.8296596057084534, "grad_norm": 1.110206127166748, "learning_rate": 1.4833857682015773e-05, "loss": 1.3672, "step": 23167 }, { "epoch": 0.8296954178380217, "grad_norm": 1.5050301551818848, "learning_rate": 1.4827779300662425e-05, "loss": 1.4046, "step": 23168 }, { "epoch": 0.8297312299675901, "grad_norm": 1.5726349353790283, "learning_rate": 1.4821702065191413e-05, "loss": 1.3428, "step": 23169 }, { "epoch": 0.8297670420971583, "grad_norm": 1.4734699726104736, "learning_rate": 1.4815625975684522e-05, "loss": 1.3572, "step": 23170 }, { "epoch": 0.8298028542267266, "grad_norm": 1.7343571186065674, "learning_rate": 1.4809551032223534e-05, "loss": 1.4396, "step": 23171 }, { "epoch": 0.8298386663562949, "grad_norm": 1.371505618095398, "learning_rate": 1.480347723489013e-05, "loss": 1.4953, "step": 23172 }, { "epoch": 0.8298744784858632, "grad_norm": 1.4669415950775146, "learning_rate": 1.4797404583766028e-05, "loss": 0.9984, "step": 23173 }, { "epoch": 0.8299102906154314, "grad_norm": 1.7243847846984863, "learning_rate": 1.4791333078932956e-05, "loss": 1.4721, "step": 23174 }, { "epoch": 0.8299461027449997, "grad_norm": 1.569770097732544, "learning_rate": 1.4785262720472615e-05, "loss": 1.4345, "step": 23175 }, { "epoch": 0.829981914874568, "grad_norm": 1.4598844051361084, "learning_rate": 1.4779193508466604e-05, "loss": 1.3252, "step": 23176 }, { "epoch": 0.8300177270041363, "grad_norm": 1.655402660369873, "learning_rate": 1.4773125442996626e-05, "loss": 1.622, "step": 23177 }, { "epoch": 0.8300535391337046, "grad_norm": 1.8673683404922485, "learning_rate": 1.4767058524144318e-05, "loss": 1.3777, "step": 23178 }, { "epoch": 0.8300893512632729, "grad_norm": 1.8644596338272095, "learning_rate": 1.476099275199131e-05, "loss": 1.4903, "step": 23179 }, { "epoch": 0.8301251633928411, "grad_norm": 1.84309720993042, "learning_rate": 1.4754928126619172e-05, "loss": 1.3268, "step": 23180 }, { "epoch": 0.8301609755224094, "grad_norm": 1.785307765007019, "learning_rate": 1.4748864648109518e-05, "loss": 1.3626, "step": 23181 }, { "epoch": 0.8301967876519777, "grad_norm": 1.6583218574523926, "learning_rate": 1.4742802316543947e-05, "loss": 1.3632, "step": 23182 }, { "epoch": 0.830232599781546, "grad_norm": 2.204003095626831, "learning_rate": 1.4736741132003984e-05, "loss": 1.4275, "step": 23183 }, { "epoch": 0.8302684119111143, "grad_norm": 1.5974963903427124, "learning_rate": 1.4730681094571175e-05, "loss": 1.5503, "step": 23184 }, { "epoch": 0.8303042240406826, "grad_norm": 1.756859540939331, "learning_rate": 1.4724622204327066e-05, "loss": 1.56, "step": 23185 }, { "epoch": 0.8303400361702509, "grad_norm": 1.429917335510254, "learning_rate": 1.471856446135319e-05, "loss": 1.5715, "step": 23186 }, { "epoch": 0.8303758482998191, "grad_norm": 1.7499074935913086, "learning_rate": 1.4712507865730996e-05, "loss": 1.5858, "step": 23187 }, { "epoch": 0.8304116604293874, "grad_norm": 1.723129153251648, "learning_rate": 1.4706452417542006e-05, "loss": 1.4264, "step": 23188 }, { "epoch": 0.8304474725589557, "grad_norm": 1.859376311302185, "learning_rate": 1.4700398116867697e-05, "loss": 1.3564, "step": 23189 }, { "epoch": 0.830483284688524, "grad_norm": 1.5590065717697144, "learning_rate": 1.4694344963789474e-05, "loss": 1.5782, "step": 23190 }, { "epoch": 0.8305190968180923, "grad_norm": 1.856689214706421, "learning_rate": 1.4688292958388816e-05, "loss": 1.1994, "step": 23191 }, { "epoch": 0.8305549089476606, "grad_norm": 1.3962154388427734, "learning_rate": 1.4682242100747123e-05, "loss": 1.4828, "step": 23192 }, { "epoch": 0.8305907210772289, "grad_norm": 1.6056325435638428, "learning_rate": 1.467619239094583e-05, "loss": 1.4341, "step": 23193 }, { "epoch": 0.8306265332067971, "grad_norm": 2.161923885345459, "learning_rate": 1.4670143829066296e-05, "loss": 1.4834, "step": 23194 }, { "epoch": 0.8306623453363654, "grad_norm": 1.8729352951049805, "learning_rate": 1.4664096415189899e-05, "loss": 1.5906, "step": 23195 }, { "epoch": 0.8306981574659337, "grad_norm": 1.8302282094955444, "learning_rate": 1.465805014939804e-05, "loss": 1.3676, "step": 23196 }, { "epoch": 0.8307339695955019, "grad_norm": 1.3889516592025757, "learning_rate": 1.465200503177201e-05, "loss": 1.4782, "step": 23197 }, { "epoch": 0.8307697817250703, "grad_norm": 2.007490396499634, "learning_rate": 1.4645961062393177e-05, "loss": 1.4347, "step": 23198 }, { "epoch": 0.8308055938546386, "grad_norm": 1.6827696561813354, "learning_rate": 1.4639918241342798e-05, "loss": 1.4996, "step": 23199 }, { "epoch": 0.8308414059842069, "grad_norm": 1.323423981666565, "learning_rate": 1.4633876568702254e-05, "loss": 1.1966, "step": 23200 }, { "epoch": 0.8308772181137751, "grad_norm": 1.5560758113861084, "learning_rate": 1.4627836044552767e-05, "loss": 1.4059, "step": 23201 }, { "epoch": 0.8309130302433434, "grad_norm": 2.4446370601654053, "learning_rate": 1.462179666897563e-05, "loss": 1.4578, "step": 23202 }, { "epoch": 0.8309488423729117, "grad_norm": 1.423201322555542, "learning_rate": 1.4615758442052085e-05, "loss": 1.1859, "step": 23203 }, { "epoch": 0.8309846545024799, "grad_norm": 1.891046166419983, "learning_rate": 1.4609721363863393e-05, "loss": 1.3277, "step": 23204 }, { "epoch": 0.8310204666320483, "grad_norm": 2.2019035816192627, "learning_rate": 1.4603685434490756e-05, "loss": 1.154, "step": 23205 }, { "epoch": 0.8310562787616166, "grad_norm": 1.9172054529190063, "learning_rate": 1.4597650654015327e-05, "loss": 1.2394, "step": 23206 }, { "epoch": 0.8310920908911849, "grad_norm": 1.5021252632141113, "learning_rate": 1.45916170225184e-05, "loss": 1.5499, "step": 23207 }, { "epoch": 0.8311279030207531, "grad_norm": 2.5014986991882324, "learning_rate": 1.4585584540081066e-05, "loss": 1.4685, "step": 23208 }, { "epoch": 0.8311637151503214, "grad_norm": 1.3708525896072388, "learning_rate": 1.4579553206784546e-05, "loss": 1.3963, "step": 23209 }, { "epoch": 0.8311995272798897, "grad_norm": 1.7175711393356323, "learning_rate": 1.45735230227099e-05, "loss": 1.5637, "step": 23210 }, { "epoch": 0.8312353394094579, "grad_norm": 1.8059896230697632, "learning_rate": 1.4567493987938364e-05, "loss": 1.3623, "step": 23211 }, { "epoch": 0.8312711515390263, "grad_norm": 1.5208191871643066, "learning_rate": 1.456146610255097e-05, "loss": 1.4858, "step": 23212 }, { "epoch": 0.8313069636685946, "grad_norm": 2.4135520458221436, "learning_rate": 1.4555439366628843e-05, "loss": 1.4645, "step": 23213 }, { "epoch": 0.8313427757981628, "grad_norm": 1.7877299785614014, "learning_rate": 1.4549413780253085e-05, "loss": 1.4712, "step": 23214 }, { "epoch": 0.8313785879277311, "grad_norm": 1.7445772886276245, "learning_rate": 1.454338934350472e-05, "loss": 1.6327, "step": 23215 }, { "epoch": 0.8314144000572994, "grad_norm": 1.4530085325241089, "learning_rate": 1.453736605646484e-05, "loss": 1.4826, "step": 23216 }, { "epoch": 0.8314502121868677, "grad_norm": 1.793205738067627, "learning_rate": 1.4531343919214414e-05, "loss": 1.3664, "step": 23217 }, { "epoch": 0.8314860243164359, "grad_norm": 1.2321491241455078, "learning_rate": 1.4525322931834562e-05, "loss": 1.1971, "step": 23218 }, { "epoch": 0.8315218364460043, "grad_norm": 1.8643945455551147, "learning_rate": 1.4519303094406211e-05, "loss": 1.5384, "step": 23219 }, { "epoch": 0.8315576485755726, "grad_norm": 1.8356062173843384, "learning_rate": 1.4513284407010385e-05, "loss": 1.5823, "step": 23220 }, { "epoch": 0.8315934607051408, "grad_norm": 1.8508111238479614, "learning_rate": 1.450726686972802e-05, "loss": 1.5022, "step": 23221 }, { "epoch": 0.8316292728347091, "grad_norm": 1.2595747709274292, "learning_rate": 1.4501250482640139e-05, "loss": 1.4594, "step": 23222 }, { "epoch": 0.8316650849642774, "grad_norm": 2.1554951667785645, "learning_rate": 1.4495235245827642e-05, "loss": 1.2048, "step": 23223 }, { "epoch": 0.8317008970938456, "grad_norm": 4.19765567779541, "learning_rate": 1.4489221159371447e-05, "loss": 1.5112, "step": 23224 }, { "epoch": 0.8317367092234139, "grad_norm": 1.5159118175506592, "learning_rate": 1.4483208223352474e-05, "loss": 1.6076, "step": 23225 }, { "epoch": 0.8317725213529823, "grad_norm": 1.4949398040771484, "learning_rate": 1.4477196437851625e-05, "loss": 1.5265, "step": 23226 }, { "epoch": 0.8318083334825506, "grad_norm": 1.6657307147979736, "learning_rate": 1.4471185802949816e-05, "loss": 1.501, "step": 23227 }, { "epoch": 0.8318441456121188, "grad_norm": 1.5736427307128906, "learning_rate": 1.4465176318727825e-05, "loss": 1.6235, "step": 23228 }, { "epoch": 0.8318799577416871, "grad_norm": 1.6373560428619385, "learning_rate": 1.4459167985266597e-05, "loss": 1.6095, "step": 23229 }, { "epoch": 0.8319157698712554, "grad_norm": 1.5352661609649658, "learning_rate": 1.4453160802646903e-05, "loss": 1.4128, "step": 23230 }, { "epoch": 0.8319515820008236, "grad_norm": 1.6256799697875977, "learning_rate": 1.444715477094961e-05, "loss": 1.4257, "step": 23231 }, { "epoch": 0.8319873941303919, "grad_norm": 1.462746262550354, "learning_rate": 1.4441149890255467e-05, "loss": 1.2893, "step": 23232 }, { "epoch": 0.8320232062599603, "grad_norm": 1.7766444683074951, "learning_rate": 1.4435146160645285e-05, "loss": 1.4783, "step": 23233 }, { "epoch": 0.8320590183895286, "grad_norm": 1.7726175785064697, "learning_rate": 1.4429143582199866e-05, "loss": 1.4622, "step": 23234 }, { "epoch": 0.8320948305190968, "grad_norm": 1.3048326969146729, "learning_rate": 1.4423142154999925e-05, "loss": 1.1044, "step": 23235 }, { "epoch": 0.8321306426486651, "grad_norm": 1.4011657238006592, "learning_rate": 1.4417141879126218e-05, "loss": 1.5252, "step": 23236 }, { "epoch": 0.8321664547782334, "grad_norm": 1.5536365509033203, "learning_rate": 1.4411142754659468e-05, "loss": 1.2663, "step": 23237 }, { "epoch": 0.8322022669078016, "grad_norm": 1.8643995523452759, "learning_rate": 1.4405144781680424e-05, "loss": 1.2179, "step": 23238 }, { "epoch": 0.8322380790373699, "grad_norm": 1.3420333862304688, "learning_rate": 1.4399147960269688e-05, "loss": 1.5695, "step": 23239 }, { "epoch": 0.8322738911669383, "grad_norm": 1.519026279449463, "learning_rate": 1.439315229050805e-05, "loss": 1.4541, "step": 23240 }, { "epoch": 0.8323097032965066, "grad_norm": 1.8381019830703735, "learning_rate": 1.4387157772476134e-05, "loss": 1.4799, "step": 23241 }, { "epoch": 0.8323455154260748, "grad_norm": 1.4542158842086792, "learning_rate": 1.4381164406254544e-05, "loss": 1.4863, "step": 23242 }, { "epoch": 0.8323813275556431, "grad_norm": 1.423120141029358, "learning_rate": 1.4375172191923947e-05, "loss": 1.3069, "step": 23243 }, { "epoch": 0.8324171396852114, "grad_norm": 1.5357532501220703, "learning_rate": 1.4369181129564957e-05, "loss": 1.473, "step": 23244 }, { "epoch": 0.8324529518147796, "grad_norm": 1.6310615539550781, "learning_rate": 1.4363191219258209e-05, "loss": 1.4536, "step": 23245 }, { "epoch": 0.8324887639443479, "grad_norm": 1.673390507698059, "learning_rate": 1.4357202461084229e-05, "loss": 1.3385, "step": 23246 }, { "epoch": 0.8325245760739163, "grad_norm": 2.130810022354126, "learning_rate": 1.4351214855123629e-05, "loss": 1.3291, "step": 23247 }, { "epoch": 0.8325603882034845, "grad_norm": 1.4513099193572998, "learning_rate": 1.4345228401456945e-05, "loss": 1.2643, "step": 23248 }, { "epoch": 0.8325962003330528, "grad_norm": 1.4877856969833374, "learning_rate": 1.4339243100164757e-05, "loss": 1.6895, "step": 23249 }, { "epoch": 0.8326320124626211, "grad_norm": 1.3141956329345703, "learning_rate": 1.4333258951327534e-05, "loss": 1.3533, "step": 23250 }, { "epoch": 0.8326678245921894, "grad_norm": 1.7010196447372437, "learning_rate": 1.4327275955025798e-05, "loss": 1.2068, "step": 23251 }, { "epoch": 0.8327036367217576, "grad_norm": 1.50443434715271, "learning_rate": 1.4321294111340089e-05, "loss": 1.283, "step": 23252 }, { "epoch": 0.8327394488513259, "grad_norm": 1.7006819248199463, "learning_rate": 1.4315313420350829e-05, "loss": 1.3811, "step": 23253 }, { "epoch": 0.8327752609808943, "grad_norm": 1.890299916267395, "learning_rate": 1.4309333882138488e-05, "loss": 1.3671, "step": 23254 }, { "epoch": 0.8328110731104625, "grad_norm": 1.7135711908340454, "learning_rate": 1.4303355496783544e-05, "loss": 1.3083, "step": 23255 }, { "epoch": 0.8328468852400308, "grad_norm": 1.436191201210022, "learning_rate": 1.4297378264366423e-05, "loss": 1.5384, "step": 23256 }, { "epoch": 0.8328826973695991, "grad_norm": 1.4963761568069458, "learning_rate": 1.4291402184967507e-05, "loss": 1.4136, "step": 23257 }, { "epoch": 0.8329185094991673, "grad_norm": 2.164684295654297, "learning_rate": 1.4285427258667217e-05, "loss": 1.5224, "step": 23258 }, { "epoch": 0.8329543216287356, "grad_norm": 1.8645986318588257, "learning_rate": 1.4279453485545968e-05, "loss": 1.9139, "step": 23259 }, { "epoch": 0.8329901337583039, "grad_norm": 1.6702115535736084, "learning_rate": 1.4273480865684074e-05, "loss": 1.3873, "step": 23260 }, { "epoch": 0.8330259458878723, "grad_norm": 1.5879836082458496, "learning_rate": 1.4267509399161916e-05, "loss": 1.2909, "step": 23261 }, { "epoch": 0.8330617580174405, "grad_norm": 1.5265380144119263, "learning_rate": 1.4261539086059839e-05, "loss": 1.5102, "step": 23262 }, { "epoch": 0.8330975701470088, "grad_norm": 1.9467709064483643, "learning_rate": 1.4255569926458168e-05, "loss": 1.4075, "step": 23263 }, { "epoch": 0.8331333822765771, "grad_norm": 1.7792093753814697, "learning_rate": 1.4249601920437194e-05, "loss": 1.3639, "step": 23264 }, { "epoch": 0.8331691944061453, "grad_norm": 1.6893055438995361, "learning_rate": 1.42436350680772e-05, "loss": 1.7997, "step": 23265 }, { "epoch": 0.8332050065357136, "grad_norm": 1.7191356420516968, "learning_rate": 1.4237669369458495e-05, "loss": 1.5456, "step": 23266 }, { "epoch": 0.8332408186652819, "grad_norm": 2.7596893310546875, "learning_rate": 1.4231704824661329e-05, "loss": 1.5299, "step": 23267 }, { "epoch": 0.8332766307948503, "grad_norm": 1.2917113304138184, "learning_rate": 1.4225741433765927e-05, "loss": 1.2855, "step": 23268 }, { "epoch": 0.8333124429244185, "grad_norm": 1.6655503511428833, "learning_rate": 1.4219779196852534e-05, "loss": 1.5177, "step": 23269 }, { "epoch": 0.8333482550539868, "grad_norm": 1.3067896366119385, "learning_rate": 1.4213818114001387e-05, "loss": 1.131, "step": 23270 }, { "epoch": 0.8333840671835551, "grad_norm": 1.9188344478607178, "learning_rate": 1.4207858185292643e-05, "loss": 1.4093, "step": 23271 }, { "epoch": 0.8334198793131233, "grad_norm": 1.8857660293579102, "learning_rate": 1.4201899410806496e-05, "loss": 1.612, "step": 23272 }, { "epoch": 0.8334556914426916, "grad_norm": 1.7722171545028687, "learning_rate": 1.4195941790623124e-05, "loss": 1.6784, "step": 23273 }, { "epoch": 0.8334915035722599, "grad_norm": 1.8155393600463867, "learning_rate": 1.4189985324822697e-05, "loss": 1.6842, "step": 23274 }, { "epoch": 0.8335273157018283, "grad_norm": 1.6256564855575562, "learning_rate": 1.4184030013485305e-05, "loss": 1.5721, "step": 23275 }, { "epoch": 0.8335631278313965, "grad_norm": 1.669029712677002, "learning_rate": 1.4178075856691097e-05, "loss": 1.4226, "step": 23276 }, { "epoch": 0.8335989399609648, "grad_norm": 1.8676083087921143, "learning_rate": 1.4172122854520198e-05, "loss": 1.7837, "step": 23277 }, { "epoch": 0.8336347520905331, "grad_norm": 1.5746208429336548, "learning_rate": 1.4166171007052653e-05, "loss": 1.2264, "step": 23278 }, { "epoch": 0.8336705642201013, "grad_norm": 2.2983994483947754, "learning_rate": 1.4160220314368555e-05, "loss": 1.6691, "step": 23279 }, { "epoch": 0.8337063763496696, "grad_norm": 1.4600412845611572, "learning_rate": 1.4154270776547974e-05, "loss": 1.1893, "step": 23280 }, { "epoch": 0.8337421884792379, "grad_norm": 3.8261477947235107, "learning_rate": 1.4148322393670976e-05, "loss": 1.55, "step": 23281 }, { "epoch": 0.8337780006088062, "grad_norm": 1.9612890481948853, "learning_rate": 1.4142375165817523e-05, "loss": 1.4173, "step": 23282 }, { "epoch": 0.8338138127383745, "grad_norm": 2.1106479167938232, "learning_rate": 1.413642909306767e-05, "loss": 1.5264, "step": 23283 }, { "epoch": 0.8338496248679428, "grad_norm": 1.3178765773773193, "learning_rate": 1.4130484175501435e-05, "loss": 1.2424, "step": 23284 }, { "epoch": 0.833885436997511, "grad_norm": 2.719381332397461, "learning_rate": 1.412454041319874e-05, "loss": 1.6399, "step": 23285 }, { "epoch": 0.8339212491270793, "grad_norm": 2.3354897499084473, "learning_rate": 1.4118597806239585e-05, "loss": 1.4628, "step": 23286 }, { "epoch": 0.8339570612566476, "grad_norm": 1.822835922241211, "learning_rate": 1.4112656354703924e-05, "loss": 1.2929, "step": 23287 }, { "epoch": 0.8339928733862159, "grad_norm": 1.2769025564193726, "learning_rate": 1.41067160586717e-05, "loss": 1.4113, "step": 23288 }, { "epoch": 0.8340286855157842, "grad_norm": 1.8172305822372437, "learning_rate": 1.4100776918222802e-05, "loss": 1.2328, "step": 23289 }, { "epoch": 0.8340644976453525, "grad_norm": 1.472387433052063, "learning_rate": 1.4094838933437138e-05, "loss": 1.4583, "step": 23290 }, { "epoch": 0.8341003097749208, "grad_norm": 1.9044989347457886, "learning_rate": 1.4088902104394607e-05, "loss": 1.7, "step": 23291 }, { "epoch": 0.834136121904489, "grad_norm": 2.0929458141326904, "learning_rate": 1.4082966431175116e-05, "loss": 1.1544, "step": 23292 }, { "epoch": 0.8341719340340573, "grad_norm": 1.484946846961975, "learning_rate": 1.4077031913858474e-05, "loss": 1.5744, "step": 23293 }, { "epoch": 0.8342077461636256, "grad_norm": 1.6925033330917358, "learning_rate": 1.4071098552524497e-05, "loss": 1.4153, "step": 23294 }, { "epoch": 0.8342435582931939, "grad_norm": 1.6346595287322998, "learning_rate": 1.4065166347253089e-05, "loss": 1.2867, "step": 23295 }, { "epoch": 0.8342793704227622, "grad_norm": 1.8432413339614868, "learning_rate": 1.4059235298124006e-05, "loss": 1.3611, "step": 23296 }, { "epoch": 0.8343151825523305, "grad_norm": 1.546345591545105, "learning_rate": 1.4053305405217044e-05, "loss": 1.531, "step": 23297 }, { "epoch": 0.8343509946818988, "grad_norm": 1.4019263982772827, "learning_rate": 1.4047376668612e-05, "loss": 1.3993, "step": 23298 }, { "epoch": 0.834386806811467, "grad_norm": 1.7556198835372925, "learning_rate": 1.4041449088388659e-05, "loss": 1.0783, "step": 23299 }, { "epoch": 0.8344226189410353, "grad_norm": 1.5568428039550781, "learning_rate": 1.4035522664626721e-05, "loss": 1.4352, "step": 23300 }, { "epoch": 0.8344584310706036, "grad_norm": 3.3579607009887695, "learning_rate": 1.4029597397405925e-05, "loss": 1.6769, "step": 23301 }, { "epoch": 0.8344942432001718, "grad_norm": 1.5644280910491943, "learning_rate": 1.4023673286806039e-05, "loss": 1.089, "step": 23302 }, { "epoch": 0.8345300553297402, "grad_norm": 1.8802729845046997, "learning_rate": 1.4017750332906698e-05, "loss": 1.7205, "step": 23303 }, { "epoch": 0.8345658674593085, "grad_norm": 1.711993932723999, "learning_rate": 1.4011828535787642e-05, "loss": 1.5439, "step": 23304 }, { "epoch": 0.8346016795888768, "grad_norm": 1.9384276866912842, "learning_rate": 1.400590789552847e-05, "loss": 1.3889, "step": 23305 }, { "epoch": 0.834637491718445, "grad_norm": 2.598912000656128, "learning_rate": 1.3999988412208931e-05, "loss": 1.5202, "step": 23306 }, { "epoch": 0.8346733038480133, "grad_norm": 1.5115162134170532, "learning_rate": 1.3994070085908596e-05, "loss": 1.5137, "step": 23307 }, { "epoch": 0.8347091159775816, "grad_norm": 1.8529272079467773, "learning_rate": 1.3988152916707121e-05, "loss": 1.2628, "step": 23308 }, { "epoch": 0.8347449281071498, "grad_norm": 1.7130944728851318, "learning_rate": 1.3982236904684064e-05, "loss": 1.2679, "step": 23309 }, { "epoch": 0.8347807402367182, "grad_norm": 1.4939451217651367, "learning_rate": 1.3976322049919088e-05, "loss": 1.5153, "step": 23310 }, { "epoch": 0.8348165523662865, "grad_norm": 1.5681949853897095, "learning_rate": 1.3970408352491749e-05, "loss": 1.4088, "step": 23311 }, { "epoch": 0.8348523644958548, "grad_norm": 1.7648473978042603, "learning_rate": 1.3964495812481548e-05, "loss": 1.3955, "step": 23312 }, { "epoch": 0.834888176625423, "grad_norm": 1.4568158388137817, "learning_rate": 1.3958584429968124e-05, "loss": 1.4164, "step": 23313 }, { "epoch": 0.8349239887549913, "grad_norm": 1.6647497415542603, "learning_rate": 1.3952674205030935e-05, "loss": 1.1866, "step": 23314 }, { "epoch": 0.8349598008845596, "grad_norm": 1.7173484563827515, "learning_rate": 1.394676513774954e-05, "loss": 1.5996, "step": 23315 }, { "epoch": 0.8349956130141278, "grad_norm": 1.6371338367462158, "learning_rate": 1.3940857228203386e-05, "loss": 1.4188, "step": 23316 }, { "epoch": 0.8350314251436962, "grad_norm": 1.3010733127593994, "learning_rate": 1.393495047647202e-05, "loss": 1.516, "step": 23317 }, { "epoch": 0.8350672372732645, "grad_norm": 1.5296361446380615, "learning_rate": 1.3929044882634867e-05, "loss": 1.4748, "step": 23318 }, { "epoch": 0.8351030494028328, "grad_norm": 1.7259113788604736, "learning_rate": 1.3923140446771409e-05, "loss": 1.8208, "step": 23319 }, { "epoch": 0.835138861532401, "grad_norm": 1.6335358619689941, "learning_rate": 1.3917237168961051e-05, "loss": 1.3154, "step": 23320 }, { "epoch": 0.8351746736619693, "grad_norm": 2.0975825786590576, "learning_rate": 1.3911335049283225e-05, "loss": 1.6693, "step": 23321 }, { "epoch": 0.8352104857915376, "grad_norm": 1.558631181716919, "learning_rate": 1.390543408781736e-05, "loss": 1.5984, "step": 23322 }, { "epoch": 0.8352462979211058, "grad_norm": 1.3782986402511597, "learning_rate": 1.3899534284642779e-05, "loss": 1.1026, "step": 23323 }, { "epoch": 0.8352821100506742, "grad_norm": 1.7837612628936768, "learning_rate": 1.3893635639838942e-05, "loss": 1.2879, "step": 23324 }, { "epoch": 0.8353179221802425, "grad_norm": 1.4235824346542358, "learning_rate": 1.3887738153485153e-05, "loss": 1.5469, "step": 23325 }, { "epoch": 0.8353537343098107, "grad_norm": 1.4114874601364136, "learning_rate": 1.388184182566079e-05, "loss": 1.523, "step": 23326 }, { "epoch": 0.835389546439379, "grad_norm": 1.7939683198928833, "learning_rate": 1.3875946656445126e-05, "loss": 1.5065, "step": 23327 }, { "epoch": 0.8354253585689473, "grad_norm": 1.2166051864624023, "learning_rate": 1.3870052645917542e-05, "loss": 1.257, "step": 23328 }, { "epoch": 0.8354611706985156, "grad_norm": 2.7868435382843018, "learning_rate": 1.3864159794157305e-05, "loss": 0.9794, "step": 23329 }, { "epoch": 0.8354969828280838, "grad_norm": 2.0114903450012207, "learning_rate": 1.3858268101243666e-05, "loss": 1.338, "step": 23330 }, { "epoch": 0.8355327949576522, "grad_norm": 1.9516043663024902, "learning_rate": 1.3852377567255913e-05, "loss": 1.3652, "step": 23331 }, { "epoch": 0.8355686070872205, "grad_norm": 1.8483436107635498, "learning_rate": 1.3846488192273298e-05, "loss": 1.8179, "step": 23332 }, { "epoch": 0.8356044192167887, "grad_norm": 1.333381175994873, "learning_rate": 1.384059997637508e-05, "loss": 1.4457, "step": 23333 }, { "epoch": 0.835640231346357, "grad_norm": 1.6573841571807861, "learning_rate": 1.3834712919640424e-05, "loss": 1.1238, "step": 23334 }, { "epoch": 0.8356760434759253, "grad_norm": 1.6716316938400269, "learning_rate": 1.382882702214856e-05, "loss": 1.6315, "step": 23335 }, { "epoch": 0.8357118556054935, "grad_norm": 2.7846570014953613, "learning_rate": 1.382294228397868e-05, "loss": 1.7287, "step": 23336 }, { "epoch": 0.8357476677350618, "grad_norm": 1.2655292749404907, "learning_rate": 1.3817058705209973e-05, "loss": 1.5531, "step": 23337 }, { "epoch": 0.8357834798646302, "grad_norm": 1.5303046703338623, "learning_rate": 1.3811176285921557e-05, "loss": 1.3362, "step": 23338 }, { "epoch": 0.8358192919941985, "grad_norm": 2.3918161392211914, "learning_rate": 1.3805295026192577e-05, "loss": 1.4692, "step": 23339 }, { "epoch": 0.8358551041237667, "grad_norm": 1.6694695949554443, "learning_rate": 1.3799414926102194e-05, "loss": 1.3039, "step": 23340 }, { "epoch": 0.835890916253335, "grad_norm": 1.2947691679000854, "learning_rate": 1.3793535985729478e-05, "loss": 1.5269, "step": 23341 }, { "epoch": 0.8359267283829033, "grad_norm": 1.4575728178024292, "learning_rate": 1.3787658205153532e-05, "loss": 1.3698, "step": 23342 }, { "epoch": 0.8359625405124715, "grad_norm": 1.376752257347107, "learning_rate": 1.3781781584453435e-05, "loss": 1.4291, "step": 23343 }, { "epoch": 0.8359983526420398, "grad_norm": 1.3669610023498535, "learning_rate": 1.3775906123708282e-05, "loss": 1.2259, "step": 23344 }, { "epoch": 0.8360341647716082, "grad_norm": 1.7606292963027954, "learning_rate": 1.3770031822997064e-05, "loss": 1.4199, "step": 23345 }, { "epoch": 0.8360699769011765, "grad_norm": 1.6351981163024902, "learning_rate": 1.3764158682398843e-05, "loss": 1.3858, "step": 23346 }, { "epoch": 0.8361057890307447, "grad_norm": 1.4310319423675537, "learning_rate": 1.3758286701992651e-05, "loss": 1.4934, "step": 23347 }, { "epoch": 0.836141601160313, "grad_norm": 1.3345412015914917, "learning_rate": 1.375241588185744e-05, "loss": 1.3918, "step": 23348 }, { "epoch": 0.8361774132898813, "grad_norm": 1.554101586341858, "learning_rate": 1.3746546222072232e-05, "loss": 1.2231, "step": 23349 }, { "epoch": 0.8362132254194495, "grad_norm": 1.5663398504257202, "learning_rate": 1.3740677722715977e-05, "loss": 1.5471, "step": 23350 }, { "epoch": 0.8362490375490178, "grad_norm": 1.472374439239502, "learning_rate": 1.373481038386767e-05, "loss": 1.4637, "step": 23351 }, { "epoch": 0.8362848496785862, "grad_norm": 1.973220705986023, "learning_rate": 1.3728944205606186e-05, "loss": 1.3186, "step": 23352 }, { "epoch": 0.8363206618081545, "grad_norm": 1.8892866373062134, "learning_rate": 1.3723079188010469e-05, "loss": 1.4246, "step": 23353 }, { "epoch": 0.8363564739377227, "grad_norm": 1.4619778394699097, "learning_rate": 1.3717215331159439e-05, "loss": 1.3644, "step": 23354 }, { "epoch": 0.836392286067291, "grad_norm": 1.3195886611938477, "learning_rate": 1.3711352635132002e-05, "loss": 1.1605, "step": 23355 }, { "epoch": 0.8364280981968593, "grad_norm": 1.655963659286499, "learning_rate": 1.3705491100006995e-05, "loss": 1.3923, "step": 23356 }, { "epoch": 0.8364639103264275, "grad_norm": 1.574783444404602, "learning_rate": 1.3699630725863289e-05, "loss": 1.3442, "step": 23357 }, { "epoch": 0.8364997224559958, "grad_norm": 1.387752652168274, "learning_rate": 1.3693771512779752e-05, "loss": 1.4718, "step": 23358 }, { "epoch": 0.8365355345855642, "grad_norm": 1.196673035621643, "learning_rate": 1.3687913460835167e-05, "loss": 1.6578, "step": 23359 }, { "epoch": 0.8365713467151324, "grad_norm": 1.6154292821884155, "learning_rate": 1.3682056570108382e-05, "loss": 1.5265, "step": 23360 }, { "epoch": 0.8366071588447007, "grad_norm": 1.4177491664886475, "learning_rate": 1.3676200840678167e-05, "loss": 1.1989, "step": 23361 }, { "epoch": 0.836642970974269, "grad_norm": 1.6271674633026123, "learning_rate": 1.3670346272623357e-05, "loss": 1.2113, "step": 23362 }, { "epoch": 0.8366787831038373, "grad_norm": 1.4818865060806274, "learning_rate": 1.366449286602265e-05, "loss": 1.4199, "step": 23363 }, { "epoch": 0.8367145952334055, "grad_norm": 1.7398416996002197, "learning_rate": 1.3658640620954832e-05, "loss": 1.4518, "step": 23364 }, { "epoch": 0.8367504073629738, "grad_norm": 1.4655635356903076, "learning_rate": 1.3652789537498656e-05, "loss": 1.5296, "step": 23365 }, { "epoch": 0.8367862194925422, "grad_norm": 1.5625853538513184, "learning_rate": 1.364693961573279e-05, "loss": 1.3327, "step": 23366 }, { "epoch": 0.8368220316221104, "grad_norm": 1.5531121492385864, "learning_rate": 1.3641090855735972e-05, "loss": 1.0688, "step": 23367 }, { "epoch": 0.8368578437516787, "grad_norm": 1.4807175397872925, "learning_rate": 1.3635243257586872e-05, "loss": 1.5016, "step": 23368 }, { "epoch": 0.836893655881247, "grad_norm": 1.3785326480865479, "learning_rate": 1.3629396821364193e-05, "loss": 1.4082, "step": 23369 }, { "epoch": 0.8369294680108152, "grad_norm": 2.476111888885498, "learning_rate": 1.3623551547146552e-05, "loss": 1.5001, "step": 23370 }, { "epoch": 0.8369652801403835, "grad_norm": 1.6185861825942993, "learning_rate": 1.3617707435012606e-05, "loss": 1.3256, "step": 23371 }, { "epoch": 0.8370010922699518, "grad_norm": 1.973078727722168, "learning_rate": 1.3611864485040982e-05, "loss": 1.6241, "step": 23372 }, { "epoch": 0.8370369043995202, "grad_norm": 1.578711748123169, "learning_rate": 1.3606022697310316e-05, "loss": 1.5142, "step": 23373 }, { "epoch": 0.8370727165290884, "grad_norm": 1.5022464990615845, "learning_rate": 1.3600182071899148e-05, "loss": 1.6072, "step": 23374 }, { "epoch": 0.8371085286586567, "grad_norm": 1.498950481414795, "learning_rate": 1.359434260888608e-05, "loss": 1.4262, "step": 23375 }, { "epoch": 0.837144340788225, "grad_norm": 1.854566216468811, "learning_rate": 1.3588504308349703e-05, "loss": 1.3126, "step": 23376 }, { "epoch": 0.8371801529177932, "grad_norm": 1.531659483909607, "learning_rate": 1.3582667170368513e-05, "loss": 1.4459, "step": 23377 }, { "epoch": 0.8372159650473615, "grad_norm": 1.3971465826034546, "learning_rate": 1.3576831195021067e-05, "loss": 1.5605, "step": 23378 }, { "epoch": 0.8372517771769298, "grad_norm": 1.5634061098098755, "learning_rate": 1.357099638238587e-05, "loss": 1.3963, "step": 23379 }, { "epoch": 0.8372875893064982, "grad_norm": 1.8827449083328247, "learning_rate": 1.356516273254147e-05, "loss": 1.2539, "step": 23380 }, { "epoch": 0.8373234014360664, "grad_norm": 2.146848678588867, "learning_rate": 1.3559330245566282e-05, "loss": 1.4573, "step": 23381 }, { "epoch": 0.8373592135656347, "grad_norm": 1.4957455396652222, "learning_rate": 1.3553498921538798e-05, "loss": 1.4717, "step": 23382 }, { "epoch": 0.837395025695203, "grad_norm": 1.9274322986602783, "learning_rate": 1.3547668760537514e-05, "loss": 1.6203, "step": 23383 }, { "epoch": 0.8374308378247712, "grad_norm": 2.2220675945281982, "learning_rate": 1.3541839762640796e-05, "loss": 1.3541, "step": 23384 }, { "epoch": 0.8374666499543395, "grad_norm": 1.7440205812454224, "learning_rate": 1.3536011927927117e-05, "loss": 1.4781, "step": 23385 }, { "epoch": 0.8375024620839078, "grad_norm": 1.5415992736816406, "learning_rate": 1.3530185256474848e-05, "loss": 1.4387, "step": 23386 }, { "epoch": 0.8375382742134762, "grad_norm": 1.6011251211166382, "learning_rate": 1.3524359748362437e-05, "loss": 1.4087, "step": 23387 }, { "epoch": 0.8375740863430444, "grad_norm": 2.023725986480713, "learning_rate": 1.3518535403668186e-05, "loss": 1.3027, "step": 23388 }, { "epoch": 0.8376098984726127, "grad_norm": 1.5329786539077759, "learning_rate": 1.3512712222470491e-05, "loss": 1.1108, "step": 23389 }, { "epoch": 0.837645710602181, "grad_norm": 1.8306118249893188, "learning_rate": 1.3506890204847722e-05, "loss": 1.6126, "step": 23390 }, { "epoch": 0.8376815227317492, "grad_norm": 1.6327990293502808, "learning_rate": 1.3501069350878149e-05, "loss": 1.2224, "step": 23391 }, { "epoch": 0.8377173348613175, "grad_norm": 1.5039691925048828, "learning_rate": 1.3495249660640142e-05, "loss": 1.1612, "step": 23392 }, { "epoch": 0.8377531469908858, "grad_norm": 1.8655650615692139, "learning_rate": 1.3489431134211916e-05, "loss": 1.4411, "step": 23393 }, { "epoch": 0.8377889591204541, "grad_norm": 1.7704803943634033, "learning_rate": 1.3483613771671843e-05, "loss": 1.5848, "step": 23394 }, { "epoch": 0.8378247712500224, "grad_norm": 1.495496153831482, "learning_rate": 1.3477797573098128e-05, "loss": 1.3929, "step": 23395 }, { "epoch": 0.8378605833795907, "grad_norm": 1.5385957956314087, "learning_rate": 1.347198253856905e-05, "loss": 1.2143, "step": 23396 }, { "epoch": 0.837896395509159, "grad_norm": 2.3165247440338135, "learning_rate": 1.3466168668162827e-05, "loss": 1.2349, "step": 23397 }, { "epoch": 0.8379322076387272, "grad_norm": 1.8588618040084839, "learning_rate": 1.3460355961957704e-05, "loss": 1.4881, "step": 23398 }, { "epoch": 0.8379680197682955, "grad_norm": 1.4896241426467896, "learning_rate": 1.3454544420031878e-05, "loss": 1.6074, "step": 23399 }, { "epoch": 0.8380038318978638, "grad_norm": 1.9073976278305054, "learning_rate": 1.3448734042463463e-05, "loss": 1.7668, "step": 23400 }, { "epoch": 0.8380396440274321, "grad_norm": 1.4687809944152832, "learning_rate": 1.3442924829330738e-05, "loss": 1.6483, "step": 23401 }, { "epoch": 0.8380754561570004, "grad_norm": 1.6176726818084717, "learning_rate": 1.3437116780711778e-05, "loss": 1.2699, "step": 23402 }, { "epoch": 0.8381112682865687, "grad_norm": 1.1954487562179565, "learning_rate": 1.3431309896684785e-05, "loss": 1.2813, "step": 23403 }, { "epoch": 0.838147080416137, "grad_norm": 1.294426441192627, "learning_rate": 1.3425504177327808e-05, "loss": 1.1881, "step": 23404 }, { "epoch": 0.8381828925457052, "grad_norm": 1.699479341506958, "learning_rate": 1.341969962271904e-05, "loss": 1.2483, "step": 23405 }, { "epoch": 0.8382187046752735, "grad_norm": 1.4392833709716797, "learning_rate": 1.3413896232936506e-05, "loss": 1.3719, "step": 23406 }, { "epoch": 0.8382545168048418, "grad_norm": 1.4115568399429321, "learning_rate": 1.3408094008058314e-05, "loss": 1.3079, "step": 23407 }, { "epoch": 0.8382903289344101, "grad_norm": 1.4831119775772095, "learning_rate": 1.3402292948162554e-05, "loss": 1.259, "step": 23408 }, { "epoch": 0.8383261410639784, "grad_norm": 1.8980132341384888, "learning_rate": 1.3396493053327208e-05, "loss": 1.2174, "step": 23409 }, { "epoch": 0.8383619531935467, "grad_norm": 1.7400474548339844, "learning_rate": 1.339069432363036e-05, "loss": 1.3569, "step": 23410 }, { "epoch": 0.8383977653231149, "grad_norm": 1.8057924509048462, "learning_rate": 1.3384896759149957e-05, "loss": 1.4134, "step": 23411 }, { "epoch": 0.8384335774526832, "grad_norm": 1.5717723369598389, "learning_rate": 1.3379100359964082e-05, "loss": 1.6989, "step": 23412 }, { "epoch": 0.8384693895822515, "grad_norm": 1.7798326015472412, "learning_rate": 1.337330512615066e-05, "loss": 1.3593, "step": 23413 }, { "epoch": 0.8385052017118197, "grad_norm": 1.4081906080245972, "learning_rate": 1.3367511057787707e-05, "loss": 1.275, "step": 23414 }, { "epoch": 0.8385410138413881, "grad_norm": 1.6106690168380737, "learning_rate": 1.3361718154953096e-05, "loss": 1.5284, "step": 23415 }, { "epoch": 0.8385768259709564, "grad_norm": 1.369853138923645, "learning_rate": 1.3355926417724852e-05, "loss": 1.051, "step": 23416 }, { "epoch": 0.8386126381005247, "grad_norm": 1.8405297994613647, "learning_rate": 1.3350135846180856e-05, "loss": 1.6593, "step": 23417 }, { "epoch": 0.8386484502300929, "grad_norm": 1.8503670692443848, "learning_rate": 1.3344346440398992e-05, "loss": 1.4476, "step": 23418 }, { "epoch": 0.8386842623596612, "grad_norm": 2.0508267879486084, "learning_rate": 1.3338558200457174e-05, "loss": 1.3094, "step": 23419 }, { "epoch": 0.8387200744892295, "grad_norm": 2.294262647628784, "learning_rate": 1.3332771126433263e-05, "loss": 1.4598, "step": 23420 }, { "epoch": 0.8387558866187977, "grad_norm": 1.7309292554855347, "learning_rate": 1.3326985218405152e-05, "loss": 1.1046, "step": 23421 }, { "epoch": 0.8387916987483661, "grad_norm": 1.3808698654174805, "learning_rate": 1.3321200476450602e-05, "loss": 1.5908, "step": 23422 }, { "epoch": 0.8388275108779344, "grad_norm": 1.4959542751312256, "learning_rate": 1.3315416900647548e-05, "loss": 1.298, "step": 23423 }, { "epoch": 0.8388633230075027, "grad_norm": 1.4639296531677246, "learning_rate": 1.3309634491073707e-05, "loss": 1.5601, "step": 23424 }, { "epoch": 0.8388991351370709, "grad_norm": 1.8734369277954102, "learning_rate": 1.330385324780694e-05, "loss": 1.5761, "step": 23425 }, { "epoch": 0.8389349472666392, "grad_norm": 1.7705508470535278, "learning_rate": 1.3298073170924986e-05, "loss": 1.6338, "step": 23426 }, { "epoch": 0.8389707593962075, "grad_norm": 1.5225541591644287, "learning_rate": 1.3292294260505611e-05, "loss": 1.3579, "step": 23427 }, { "epoch": 0.8390065715257757, "grad_norm": 1.517962098121643, "learning_rate": 1.328651651662659e-05, "loss": 1.0889, "step": 23428 }, { "epoch": 0.8390423836553441, "grad_norm": 1.481013536453247, "learning_rate": 1.3280739939365617e-05, "loss": 1.5388, "step": 23429 }, { "epoch": 0.8390781957849124, "grad_norm": 2.0160789489746094, "learning_rate": 1.3274964528800437e-05, "loss": 1.8554, "step": 23430 }, { "epoch": 0.8391140079144807, "grad_norm": 1.6885130405426025, "learning_rate": 1.3269190285008737e-05, "loss": 1.433, "step": 23431 }, { "epoch": 0.8391498200440489, "grad_norm": 1.642545461654663, "learning_rate": 1.3263417208068218e-05, "loss": 1.3426, "step": 23432 }, { "epoch": 0.8391856321736172, "grad_norm": 1.5598057508468628, "learning_rate": 1.325764529805651e-05, "loss": 1.3445, "step": 23433 }, { "epoch": 0.8392214443031855, "grad_norm": 1.5403119325637817, "learning_rate": 1.3251874555051336e-05, "loss": 1.5846, "step": 23434 }, { "epoch": 0.8392572564327537, "grad_norm": 1.8304781913757324, "learning_rate": 1.3246104979130281e-05, "loss": 1.3213, "step": 23435 }, { "epoch": 0.8392930685623221, "grad_norm": 1.7894020080566406, "learning_rate": 1.324033657037097e-05, "loss": 1.6969, "step": 23436 }, { "epoch": 0.8393288806918904, "grad_norm": 1.809398889541626, "learning_rate": 1.323456932885101e-05, "loss": 1.4837, "step": 23437 }, { "epoch": 0.8393646928214586, "grad_norm": 1.6280808448791504, "learning_rate": 1.3228803254648004e-05, "loss": 1.6715, "step": 23438 }, { "epoch": 0.8394005049510269, "grad_norm": 1.8543540239334106, "learning_rate": 1.3223038347839544e-05, "loss": 1.3243, "step": 23439 }, { "epoch": 0.8394363170805952, "grad_norm": 1.5658224821090698, "learning_rate": 1.321727460850315e-05, "loss": 1.4643, "step": 23440 }, { "epoch": 0.8394721292101635, "grad_norm": 1.7527110576629639, "learning_rate": 1.321151203671639e-05, "loss": 1.2166, "step": 23441 }, { "epoch": 0.8395079413397317, "grad_norm": 1.9235097169876099, "learning_rate": 1.320575063255678e-05, "loss": 1.3466, "step": 23442 }, { "epoch": 0.8395437534693001, "grad_norm": 1.8319237232208252, "learning_rate": 1.3199990396101858e-05, "loss": 1.3571, "step": 23443 }, { "epoch": 0.8395795655988684, "grad_norm": 2.8446924686431885, "learning_rate": 1.3194231327429085e-05, "loss": 1.7009, "step": 23444 }, { "epoch": 0.8396153777284366, "grad_norm": 2.0625321865081787, "learning_rate": 1.3188473426615956e-05, "loss": 1.5693, "step": 23445 }, { "epoch": 0.8396511898580049, "grad_norm": 1.4854190349578857, "learning_rate": 1.3182716693739949e-05, "loss": 1.1659, "step": 23446 }, { "epoch": 0.8396870019875732, "grad_norm": 1.87632155418396, "learning_rate": 1.3176961128878495e-05, "loss": 1.3416, "step": 23447 }, { "epoch": 0.8397228141171414, "grad_norm": 1.6211196184158325, "learning_rate": 1.3171206732109031e-05, "loss": 1.342, "step": 23448 }, { "epoch": 0.8397586262467097, "grad_norm": 1.543217658996582, "learning_rate": 1.3165453503508984e-05, "loss": 1.5375, "step": 23449 }, { "epoch": 0.8397944383762781, "grad_norm": 1.9819585084915161, "learning_rate": 1.3159701443155759e-05, "loss": 1.6551, "step": 23450 }, { "epoch": 0.8398302505058464, "grad_norm": 1.3575177192687988, "learning_rate": 1.3153950551126725e-05, "loss": 1.5489, "step": 23451 }, { "epoch": 0.8398660626354146, "grad_norm": 1.597785472869873, "learning_rate": 1.3148200827499269e-05, "loss": 1.4854, "step": 23452 }, { "epoch": 0.8399018747649829, "grad_norm": 1.6049631834030151, "learning_rate": 1.3142452272350747e-05, "loss": 1.3046, "step": 23453 }, { "epoch": 0.8399376868945512, "grad_norm": 1.8065446615219116, "learning_rate": 1.3136704885758477e-05, "loss": 1.4958, "step": 23454 }, { "epoch": 0.8399734990241194, "grad_norm": 1.5930157899856567, "learning_rate": 1.3130958667799798e-05, "loss": 1.6832, "step": 23455 }, { "epoch": 0.8400093111536877, "grad_norm": 1.551112413406372, "learning_rate": 1.3125213618552013e-05, "loss": 1.4293, "step": 23456 }, { "epoch": 0.8400451232832561, "grad_norm": 2.0661537647247314, "learning_rate": 1.3119469738092449e-05, "loss": 1.3053, "step": 23457 }, { "epoch": 0.8400809354128244, "grad_norm": 1.5538454055786133, "learning_rate": 1.3113727026498323e-05, "loss": 1.2876, "step": 23458 }, { "epoch": 0.8401167475423926, "grad_norm": 1.8126628398895264, "learning_rate": 1.310798548384693e-05, "loss": 1.5399, "step": 23459 }, { "epoch": 0.8401525596719609, "grad_norm": 1.6119327545166016, "learning_rate": 1.3102245110215495e-05, "loss": 1.3692, "step": 23460 }, { "epoch": 0.8401883718015292, "grad_norm": 1.673466444015503, "learning_rate": 1.30965059056813e-05, "loss": 1.1661, "step": 23461 }, { "epoch": 0.8402241839310974, "grad_norm": 3.0586817264556885, "learning_rate": 1.3090767870321496e-05, "loss": 1.6918, "step": 23462 }, { "epoch": 0.8402599960606657, "grad_norm": 1.7422840595245361, "learning_rate": 1.30850310042133e-05, "loss": 1.4367, "step": 23463 }, { "epoch": 0.8402958081902341, "grad_norm": 1.521484613418579, "learning_rate": 1.3079295307433925e-05, "loss": 1.5184, "step": 23464 }, { "epoch": 0.8403316203198024, "grad_norm": 1.3086313009262085, "learning_rate": 1.307356078006049e-05, "loss": 1.2359, "step": 23465 }, { "epoch": 0.8403674324493706, "grad_norm": 1.7559353113174438, "learning_rate": 1.3067827422170165e-05, "loss": 1.593, "step": 23466 }, { "epoch": 0.8404032445789389, "grad_norm": 1.462621808052063, "learning_rate": 1.3062095233840089e-05, "loss": 1.3325, "step": 23467 }, { "epoch": 0.8404390567085072, "grad_norm": 1.2271201610565186, "learning_rate": 1.30563642151474e-05, "loss": 1.4682, "step": 23468 }, { "epoch": 0.8404748688380754, "grad_norm": 2.206615924835205, "learning_rate": 1.3050634366169156e-05, "loss": 1.0847, "step": 23469 }, { "epoch": 0.8405106809676437, "grad_norm": 1.6466546058654785, "learning_rate": 1.3044905686982479e-05, "loss": 1.3145, "step": 23470 }, { "epoch": 0.8405464930972121, "grad_norm": 1.7529257535934448, "learning_rate": 1.3039178177664458e-05, "loss": 1.2314, "step": 23471 }, { "epoch": 0.8405823052267803, "grad_norm": 1.4902821779251099, "learning_rate": 1.3033451838292088e-05, "loss": 1.453, "step": 23472 }, { "epoch": 0.8406181173563486, "grad_norm": 1.2909750938415527, "learning_rate": 1.3027726668942452e-05, "loss": 1.4757, "step": 23473 }, { "epoch": 0.8406539294859169, "grad_norm": 1.7654305696487427, "learning_rate": 1.3022002669692568e-05, "loss": 1.2688, "step": 23474 }, { "epoch": 0.8406897416154852, "grad_norm": 1.5162429809570312, "learning_rate": 1.3016279840619461e-05, "loss": 1.7285, "step": 23475 }, { "epoch": 0.8407255537450534, "grad_norm": 1.8618532419204712, "learning_rate": 1.3010558181800091e-05, "loss": 1.5294, "step": 23476 }, { "epoch": 0.8407613658746217, "grad_norm": 1.9252269268035889, "learning_rate": 1.3004837693311445e-05, "loss": 1.8202, "step": 23477 }, { "epoch": 0.8407971780041901, "grad_norm": 1.5172358751296997, "learning_rate": 1.2999118375230523e-05, "loss": 1.452, "step": 23478 }, { "epoch": 0.8408329901337583, "grad_norm": 1.8392778635025024, "learning_rate": 1.2993400227634211e-05, "loss": 1.2184, "step": 23479 }, { "epoch": 0.8408688022633266, "grad_norm": 1.4657130241394043, "learning_rate": 1.2987683250599481e-05, "loss": 1.2283, "step": 23480 }, { "epoch": 0.8409046143928949, "grad_norm": 1.8285713195800781, "learning_rate": 1.2981967444203224e-05, "loss": 1.2617, "step": 23481 }, { "epoch": 0.8409404265224631, "grad_norm": 1.4383978843688965, "learning_rate": 1.297625280852237e-05, "loss": 1.2994, "step": 23482 }, { "epoch": 0.8409762386520314, "grad_norm": 1.733881950378418, "learning_rate": 1.297053934363377e-05, "loss": 1.5984, "step": 23483 }, { "epoch": 0.8410120507815997, "grad_norm": 1.4433633089065552, "learning_rate": 1.2964827049614291e-05, "loss": 1.3371, "step": 23484 }, { "epoch": 0.8410478629111681, "grad_norm": 1.5623021125793457, "learning_rate": 1.295911592654081e-05, "loss": 1.8593, "step": 23485 }, { "epoch": 0.8410836750407363, "grad_norm": 1.315675973892212, "learning_rate": 1.2953405974490163e-05, "loss": 1.326, "step": 23486 }, { "epoch": 0.8411194871703046, "grad_norm": 2.0233829021453857, "learning_rate": 1.2947697193539154e-05, "loss": 1.499, "step": 23487 }, { "epoch": 0.8411552992998729, "grad_norm": 1.2258970737457275, "learning_rate": 1.2941989583764547e-05, "loss": 1.0147, "step": 23488 }, { "epoch": 0.8411911114294411, "grad_norm": 1.2284082174301147, "learning_rate": 1.2936283145243222e-05, "loss": 1.2718, "step": 23489 }, { "epoch": 0.8412269235590094, "grad_norm": 1.7486215829849243, "learning_rate": 1.2930577878051887e-05, "loss": 1.5571, "step": 23490 }, { "epoch": 0.8412627356885777, "grad_norm": 1.4280521869659424, "learning_rate": 1.2924873782267322e-05, "loss": 1.5505, "step": 23491 }, { "epoch": 0.8412985478181461, "grad_norm": 1.516255259513855, "learning_rate": 1.2919170857966223e-05, "loss": 1.2592, "step": 23492 }, { "epoch": 0.8413343599477143, "grad_norm": 1.6720364093780518, "learning_rate": 1.2913469105225407e-05, "loss": 1.5911, "step": 23493 }, { "epoch": 0.8413701720772826, "grad_norm": 1.5660908222198486, "learning_rate": 1.29077685241215e-05, "loss": 1.3434, "step": 23494 }, { "epoch": 0.8414059842068509, "grad_norm": 1.516623616218567, "learning_rate": 1.290206911473123e-05, "loss": 1.3898, "step": 23495 }, { "epoch": 0.8414417963364191, "grad_norm": 1.6085141897201538, "learning_rate": 1.2896370877131293e-05, "loss": 1.5737, "step": 23496 }, { "epoch": 0.8414776084659874, "grad_norm": 1.5538430213928223, "learning_rate": 1.2890673811398301e-05, "loss": 1.4526, "step": 23497 }, { "epoch": 0.8415134205955557, "grad_norm": 1.6788676977157593, "learning_rate": 1.2884977917608964e-05, "loss": 1.6376, "step": 23498 }, { "epoch": 0.841549232725124, "grad_norm": 1.8180345296859741, "learning_rate": 1.287928319583983e-05, "loss": 1.3496, "step": 23499 }, { "epoch": 0.8415850448546923, "grad_norm": 1.3776708841323853, "learning_rate": 1.2873589646167605e-05, "loss": 1.3057, "step": 23500 }, { "epoch": 0.8416208569842606, "grad_norm": 2.0103936195373535, "learning_rate": 1.2867897268668826e-05, "loss": 1.289, "step": 23501 }, { "epoch": 0.8416566691138289, "grad_norm": 2.1895813941955566, "learning_rate": 1.2862206063420113e-05, "loss": 1.3973, "step": 23502 }, { "epoch": 0.8416924812433971, "grad_norm": 1.5710906982421875, "learning_rate": 1.2856516030497979e-05, "loss": 1.3817, "step": 23503 }, { "epoch": 0.8417282933729654, "grad_norm": 1.7813724279403687, "learning_rate": 1.2850827169979063e-05, "loss": 1.259, "step": 23504 }, { "epoch": 0.8417641055025337, "grad_norm": 1.3750540018081665, "learning_rate": 1.284513948193985e-05, "loss": 1.3579, "step": 23505 }, { "epoch": 0.841799917632102, "grad_norm": 1.7597627639770508, "learning_rate": 1.2839452966456822e-05, "loss": 1.2448, "step": 23506 }, { "epoch": 0.8418357297616703, "grad_norm": 1.2781704664230347, "learning_rate": 1.2833767623606563e-05, "loss": 1.4802, "step": 23507 }, { "epoch": 0.8418715418912386, "grad_norm": 1.4671543836593628, "learning_rate": 1.28280834534655e-05, "loss": 1.1333, "step": 23508 }, { "epoch": 0.8419073540208069, "grad_norm": 1.8261942863464355, "learning_rate": 1.2822400456110162e-05, "loss": 1.3526, "step": 23509 }, { "epoch": 0.8419431661503751, "grad_norm": 1.9040741920471191, "learning_rate": 1.281671863161693e-05, "loss": 1.2746, "step": 23510 }, { "epoch": 0.8419789782799434, "grad_norm": 1.4849711656570435, "learning_rate": 1.2811037980062324e-05, "loss": 1.4179, "step": 23511 }, { "epoch": 0.8420147904095117, "grad_norm": 1.4805055856704712, "learning_rate": 1.2805358501522724e-05, "loss": 1.4121, "step": 23512 }, { "epoch": 0.84205060253908, "grad_norm": 1.6600576639175415, "learning_rate": 1.279968019607457e-05, "loss": 1.4056, "step": 23513 }, { "epoch": 0.8420864146686483, "grad_norm": 1.296726107597351, "learning_rate": 1.2794003063794225e-05, "loss": 1.6649, "step": 23514 }, { "epoch": 0.8421222267982166, "grad_norm": 1.372146725654602, "learning_rate": 1.2788327104758068e-05, "loss": 1.5393, "step": 23515 }, { "epoch": 0.8421580389277848, "grad_norm": 1.7383742332458496, "learning_rate": 1.278265231904251e-05, "loss": 1.5947, "step": 23516 }, { "epoch": 0.8421938510573531, "grad_norm": 1.5140732526779175, "learning_rate": 1.277697870672383e-05, "loss": 1.3892, "step": 23517 }, { "epoch": 0.8422296631869214, "grad_norm": 1.4669893980026245, "learning_rate": 1.2771306267878392e-05, "loss": 1.2139, "step": 23518 }, { "epoch": 0.8422654753164897, "grad_norm": 2.693443536758423, "learning_rate": 1.2765635002582521e-05, "loss": 1.362, "step": 23519 }, { "epoch": 0.842301287446058, "grad_norm": 1.3767279386520386, "learning_rate": 1.2759964910912524e-05, "loss": 1.2912, "step": 23520 }, { "epoch": 0.8423370995756263, "grad_norm": 1.9460740089416504, "learning_rate": 1.275429599294462e-05, "loss": 1.6095, "step": 23521 }, { "epoch": 0.8423729117051946, "grad_norm": 1.8773564100265503, "learning_rate": 1.2748628248755167e-05, "loss": 1.5899, "step": 23522 }, { "epoch": 0.8424087238347628, "grad_norm": 1.7842673063278198, "learning_rate": 1.2742961678420385e-05, "loss": 1.4446, "step": 23523 }, { "epoch": 0.8424445359643311, "grad_norm": 1.3329923152923584, "learning_rate": 1.2737296282016464e-05, "loss": 1.4283, "step": 23524 }, { "epoch": 0.8424803480938994, "grad_norm": 1.8569622039794922, "learning_rate": 1.2731632059619669e-05, "loss": 1.3498, "step": 23525 }, { "epoch": 0.8425161602234676, "grad_norm": 2.4255120754241943, "learning_rate": 1.2725969011306204e-05, "loss": 1.6669, "step": 23526 }, { "epoch": 0.842551972353036, "grad_norm": 1.7957884073257446, "learning_rate": 1.2720307137152266e-05, "loss": 1.6592, "step": 23527 }, { "epoch": 0.8425877844826043, "grad_norm": 1.8802419900894165, "learning_rate": 1.271464643723399e-05, "loss": 1.3524, "step": 23528 }, { "epoch": 0.8426235966121726, "grad_norm": 1.8517471551895142, "learning_rate": 1.2708986911627551e-05, "loss": 1.5125, "step": 23529 }, { "epoch": 0.8426594087417408, "grad_norm": 1.721115231513977, "learning_rate": 1.27033285604091e-05, "loss": 1.3382, "step": 23530 }, { "epoch": 0.8426952208713091, "grad_norm": 1.90151846408844, "learning_rate": 1.2697671383654786e-05, "loss": 1.58, "step": 23531 }, { "epoch": 0.8427310330008774, "grad_norm": 1.4302324056625366, "learning_rate": 1.2692015381440658e-05, "loss": 1.1196, "step": 23532 }, { "epoch": 0.8427668451304456, "grad_norm": 1.452170491218567, "learning_rate": 1.2686360553842857e-05, "loss": 1.4946, "step": 23533 }, { "epoch": 0.842802657260014, "grad_norm": 1.3650285005569458, "learning_rate": 1.2680706900937455e-05, "loss": 1.3334, "step": 23534 }, { "epoch": 0.8428384693895823, "grad_norm": 1.7403556108474731, "learning_rate": 1.2675054422800503e-05, "loss": 1.6425, "step": 23535 }, { "epoch": 0.8428742815191506, "grad_norm": 1.6567943096160889, "learning_rate": 1.2669403119508039e-05, "loss": 1.5255, "step": 23536 }, { "epoch": 0.8429100936487188, "grad_norm": 1.5474529266357422, "learning_rate": 1.2663752991136112e-05, "loss": 1.6668, "step": 23537 }, { "epoch": 0.8429459057782871, "grad_norm": 1.6595933437347412, "learning_rate": 1.2658104037760753e-05, "loss": 1.3727, "step": 23538 }, { "epoch": 0.8429817179078554, "grad_norm": 1.8614715337753296, "learning_rate": 1.2652456259457924e-05, "loss": 1.4968, "step": 23539 }, { "epoch": 0.8430175300374236, "grad_norm": 2.24442720413208, "learning_rate": 1.2646809656303627e-05, "loss": 1.5371, "step": 23540 }, { "epoch": 0.843053342166992, "grad_norm": 1.914910912513733, "learning_rate": 1.2641164228373847e-05, "loss": 1.4012, "step": 23541 }, { "epoch": 0.8430891542965603, "grad_norm": 3.1867616176605225, "learning_rate": 1.2635519975744503e-05, "loss": 1.6351, "step": 23542 }, { "epoch": 0.8431249664261286, "grad_norm": 1.5233803987503052, "learning_rate": 1.2629876898491532e-05, "loss": 1.4552, "step": 23543 }, { "epoch": 0.8431607785556968, "grad_norm": 1.6941332817077637, "learning_rate": 1.2624234996690875e-05, "loss": 1.233, "step": 23544 }, { "epoch": 0.8431965906852651, "grad_norm": 1.8111356496810913, "learning_rate": 1.2618594270418448e-05, "loss": 1.1046, "step": 23545 }, { "epoch": 0.8432324028148334, "grad_norm": 1.4550955295562744, "learning_rate": 1.2612954719750103e-05, "loss": 1.3761, "step": 23546 }, { "epoch": 0.8432682149444016, "grad_norm": 1.7182893753051758, "learning_rate": 1.2607316344761733e-05, "loss": 1.261, "step": 23547 }, { "epoch": 0.84330402707397, "grad_norm": 1.9735833406448364, "learning_rate": 1.2601679145529189e-05, "loss": 1.2702, "step": 23548 }, { "epoch": 0.8433398392035383, "grad_norm": 1.8338557481765747, "learning_rate": 1.2596043122128343e-05, "loss": 1.2931, "step": 23549 }, { "epoch": 0.8433756513331065, "grad_norm": 2.4479641914367676, "learning_rate": 1.2590408274634969e-05, "loss": 1.2256, "step": 23550 }, { "epoch": 0.8434114634626748, "grad_norm": 1.5408915281295776, "learning_rate": 1.2584774603124905e-05, "loss": 1.352, "step": 23551 }, { "epoch": 0.8434472755922431, "grad_norm": 1.7759418487548828, "learning_rate": 1.2579142107673959e-05, "loss": 1.282, "step": 23552 }, { "epoch": 0.8434830877218114, "grad_norm": 1.3212677240371704, "learning_rate": 1.2573510788357867e-05, "loss": 1.3845, "step": 23553 }, { "epoch": 0.8435188998513796, "grad_norm": 2.116630792617798, "learning_rate": 1.2567880645252417e-05, "loss": 1.23, "step": 23554 }, { "epoch": 0.843554711980948, "grad_norm": 2.0516796112060547, "learning_rate": 1.2562251678433356e-05, "loss": 1.6818, "step": 23555 }, { "epoch": 0.8435905241105163, "grad_norm": 1.6188548803329468, "learning_rate": 1.2556623887976427e-05, "loss": 1.6243, "step": 23556 }, { "epoch": 0.8436263362400845, "grad_norm": 1.391708254814148, "learning_rate": 1.255099727395732e-05, "loss": 1.2512, "step": 23557 }, { "epoch": 0.8436621483696528, "grad_norm": 1.5969374179840088, "learning_rate": 1.2545371836451736e-05, "loss": 1.2388, "step": 23558 }, { "epoch": 0.8436979604992211, "grad_norm": 1.3789353370666504, "learning_rate": 1.2539747575535387e-05, "loss": 1.4325, "step": 23559 }, { "epoch": 0.8437337726287893, "grad_norm": 2.3373169898986816, "learning_rate": 1.2534124491283893e-05, "loss": 1.6212, "step": 23560 }, { "epoch": 0.8437695847583576, "grad_norm": 2.5310919284820557, "learning_rate": 1.2528502583772938e-05, "loss": 1.6208, "step": 23561 }, { "epoch": 0.843805396887926, "grad_norm": 1.8715249300003052, "learning_rate": 1.252288185307815e-05, "loss": 1.5006, "step": 23562 }, { "epoch": 0.8438412090174943, "grad_norm": 1.8814998865127563, "learning_rate": 1.2517262299275167e-05, "loss": 1.52, "step": 23563 }, { "epoch": 0.8438770211470625, "grad_norm": 1.4319605827331543, "learning_rate": 1.2511643922439564e-05, "loss": 1.4662, "step": 23564 }, { "epoch": 0.8439128332766308, "grad_norm": 1.735001564025879, "learning_rate": 1.2506026722646924e-05, "loss": 0.974, "step": 23565 }, { "epoch": 0.8439486454061991, "grad_norm": 1.460253119468689, "learning_rate": 1.2500410699972853e-05, "loss": 1.0201, "step": 23566 }, { "epoch": 0.8439844575357673, "grad_norm": 2.5098659992218018, "learning_rate": 1.2494795854492903e-05, "loss": 1.7105, "step": 23567 }, { "epoch": 0.8440202696653356, "grad_norm": 1.4424971342086792, "learning_rate": 1.2489182186282577e-05, "loss": 1.2676, "step": 23568 }, { "epoch": 0.8440560817949039, "grad_norm": 1.512639045715332, "learning_rate": 1.2483569695417418e-05, "loss": 1.4629, "step": 23569 }, { "epoch": 0.8440918939244723, "grad_norm": 1.5117937326431274, "learning_rate": 1.2477958381972977e-05, "loss": 1.133, "step": 23570 }, { "epoch": 0.8441277060540405, "grad_norm": 1.7218340635299683, "learning_rate": 1.2472348246024679e-05, "loss": 1.2329, "step": 23571 }, { "epoch": 0.8441635181836088, "grad_norm": 1.6333949565887451, "learning_rate": 1.2466739287648032e-05, "loss": 1.5322, "step": 23572 }, { "epoch": 0.8441993303131771, "grad_norm": 1.4452733993530273, "learning_rate": 1.24611315069185e-05, "loss": 1.3076, "step": 23573 }, { "epoch": 0.8442351424427453, "grad_norm": 1.46990168094635, "learning_rate": 1.2455524903911552e-05, "loss": 1.3682, "step": 23574 }, { "epoch": 0.8442709545723136, "grad_norm": 2.660382032394409, "learning_rate": 1.2449919478702587e-05, "loss": 1.623, "step": 23575 }, { "epoch": 0.8443067667018819, "grad_norm": 2.3135154247283936, "learning_rate": 1.2444315231366988e-05, "loss": 1.5553, "step": 23576 }, { "epoch": 0.8443425788314503, "grad_norm": 1.4820939302444458, "learning_rate": 1.2438712161980226e-05, "loss": 1.5055, "step": 23577 }, { "epoch": 0.8443783909610185, "grad_norm": 1.7251002788543701, "learning_rate": 1.2433110270617632e-05, "loss": 1.7433, "step": 23578 }, { "epoch": 0.8444142030905868, "grad_norm": 1.5508625507354736, "learning_rate": 1.2427509557354578e-05, "loss": 1.2144, "step": 23579 }, { "epoch": 0.8444500152201551, "grad_norm": 1.7173960208892822, "learning_rate": 1.2421910022266425e-05, "loss": 1.3851, "step": 23580 }, { "epoch": 0.8444858273497233, "grad_norm": 1.360556960105896, "learning_rate": 1.2416311665428526e-05, "loss": 1.4602, "step": 23581 }, { "epoch": 0.8445216394792916, "grad_norm": 1.280659794807434, "learning_rate": 1.2410714486916164e-05, "loss": 1.4802, "step": 23582 }, { "epoch": 0.8445574516088599, "grad_norm": 1.5254395008087158, "learning_rate": 1.2405118486804646e-05, "loss": 1.4026, "step": 23583 }, { "epoch": 0.8445932637384282, "grad_norm": 1.5753604173660278, "learning_rate": 1.2399523665169298e-05, "loss": 1.353, "step": 23584 }, { "epoch": 0.8446290758679965, "grad_norm": 3.215534210205078, "learning_rate": 1.239393002208533e-05, "loss": 1.3249, "step": 23585 }, { "epoch": 0.8446648879975648, "grad_norm": 1.6247320175170898, "learning_rate": 1.238833755762806e-05, "loss": 1.4145, "step": 23586 }, { "epoch": 0.844700700127133, "grad_norm": 2.186195135116577, "learning_rate": 1.2382746271872658e-05, "loss": 1.7662, "step": 23587 }, { "epoch": 0.8447365122567013, "grad_norm": 1.3721939325332642, "learning_rate": 1.2377156164894422e-05, "loss": 1.0364, "step": 23588 }, { "epoch": 0.8447723243862696, "grad_norm": 1.7674872875213623, "learning_rate": 1.2371567236768511e-05, "loss": 1.2248, "step": 23589 }, { "epoch": 0.8448081365158379, "grad_norm": 1.807494044303894, "learning_rate": 1.2365979487570122e-05, "loss": 1.3406, "step": 23590 }, { "epoch": 0.8448439486454062, "grad_norm": 3.335813522338867, "learning_rate": 1.2360392917374442e-05, "loss": 1.4115, "step": 23591 }, { "epoch": 0.8448797607749745, "grad_norm": 1.336086392402649, "learning_rate": 1.235480752625665e-05, "loss": 1.582, "step": 23592 }, { "epoch": 0.8449155729045428, "grad_norm": 2.0651471614837646, "learning_rate": 1.234922331429188e-05, "loss": 1.2444, "step": 23593 }, { "epoch": 0.844951385034111, "grad_norm": 1.5631545782089233, "learning_rate": 1.2343640281555191e-05, "loss": 1.1421, "step": 23594 }, { "epoch": 0.8449871971636793, "grad_norm": 2.9039080142974854, "learning_rate": 1.2338058428121802e-05, "loss": 1.5782, "step": 23595 }, { "epoch": 0.8450230092932476, "grad_norm": 1.8071566820144653, "learning_rate": 1.233247775406674e-05, "loss": 1.4459, "step": 23596 }, { "epoch": 0.8450588214228159, "grad_norm": 1.7052816152572632, "learning_rate": 1.2326898259465125e-05, "loss": 1.6382, "step": 23597 }, { "epoch": 0.8450946335523842, "grad_norm": 1.8090574741363525, "learning_rate": 1.2321319944391963e-05, "loss": 1.3878, "step": 23598 }, { "epoch": 0.8451304456819525, "grad_norm": 1.5731558799743652, "learning_rate": 1.2315742808922382e-05, "loss": 1.2031, "step": 23599 }, { "epoch": 0.8451662578115208, "grad_norm": 1.4847115278244019, "learning_rate": 1.2310166853131366e-05, "loss": 1.4141, "step": 23600 }, { "epoch": 0.845202069941089, "grad_norm": 1.2769142389297485, "learning_rate": 1.2304592077093958e-05, "loss": 1.3135, "step": 23601 }, { "epoch": 0.8452378820706573, "grad_norm": 1.4810028076171875, "learning_rate": 1.2299018480885117e-05, "loss": 1.0614, "step": 23602 }, { "epoch": 0.8452736942002256, "grad_norm": 1.6237541437149048, "learning_rate": 1.2293446064579873e-05, "loss": 1.2855, "step": 23603 }, { "epoch": 0.8453095063297938, "grad_norm": 1.9380799531936646, "learning_rate": 1.2287874828253187e-05, "loss": 1.5237, "step": 23604 }, { "epoch": 0.8453453184593622, "grad_norm": 2.556157350540161, "learning_rate": 1.2282304771979958e-05, "loss": 1.4906, "step": 23605 }, { "epoch": 0.8453811305889305, "grad_norm": 2.639732837677002, "learning_rate": 1.2276735895835223e-05, "loss": 1.5245, "step": 23606 }, { "epoch": 0.8454169427184988, "grad_norm": 1.5100208520889282, "learning_rate": 1.2271168199893834e-05, "loss": 1.6371, "step": 23607 }, { "epoch": 0.845452754848067, "grad_norm": 1.5558120012283325, "learning_rate": 1.2265601684230732e-05, "loss": 1.3295, "step": 23608 }, { "epoch": 0.8454885669776353, "grad_norm": 1.4905898571014404, "learning_rate": 1.2260036348920745e-05, "loss": 1.6187, "step": 23609 }, { "epoch": 0.8455243791072036, "grad_norm": 1.5503627061843872, "learning_rate": 1.2254472194038835e-05, "loss": 1.4804, "step": 23610 }, { "epoch": 0.8455601912367718, "grad_norm": 1.5962167978286743, "learning_rate": 1.224890921965981e-05, "loss": 1.4638, "step": 23611 }, { "epoch": 0.8455960033663402, "grad_norm": 1.7058485746383667, "learning_rate": 1.2243347425858508e-05, "loss": 1.4108, "step": 23612 }, { "epoch": 0.8456318154959085, "grad_norm": 2.0142135620117188, "learning_rate": 1.2237786812709773e-05, "loss": 1.6224, "step": 23613 }, { "epoch": 0.8456676276254768, "grad_norm": 1.3440698385238647, "learning_rate": 1.2232227380288408e-05, "loss": 1.5021, "step": 23614 }, { "epoch": 0.845703439755045, "grad_norm": 1.5169098377227783, "learning_rate": 1.2226669128669232e-05, "loss": 1.3667, "step": 23615 }, { "epoch": 0.8457392518846133, "grad_norm": 2.082235813140869, "learning_rate": 1.2221112057926954e-05, "loss": 1.5047, "step": 23616 }, { "epoch": 0.8457750640141816, "grad_norm": 1.3239139318466187, "learning_rate": 1.2215556168136443e-05, "loss": 1.5601, "step": 23617 }, { "epoch": 0.8458108761437498, "grad_norm": 1.6118295192718506, "learning_rate": 1.2210001459372355e-05, "loss": 1.5155, "step": 23618 }, { "epoch": 0.8458466882733182, "grad_norm": 1.7186284065246582, "learning_rate": 1.2204447931709484e-05, "loss": 1.6944, "step": 23619 }, { "epoch": 0.8458825004028865, "grad_norm": 1.6014262437820435, "learning_rate": 1.2198895585222503e-05, "loss": 1.5028, "step": 23620 }, { "epoch": 0.8459183125324548, "grad_norm": 1.8666081428527832, "learning_rate": 1.219334441998612e-05, "loss": 1.4784, "step": 23621 }, { "epoch": 0.845954124662023, "grad_norm": 1.4160361289978027, "learning_rate": 1.2187794436075039e-05, "loss": 1.0623, "step": 23622 }, { "epoch": 0.8459899367915913, "grad_norm": 1.6083838939666748, "learning_rate": 1.2182245633563905e-05, "loss": 1.2759, "step": 23623 }, { "epoch": 0.8460257489211596, "grad_norm": 1.7315397262573242, "learning_rate": 1.2176698012527376e-05, "loss": 1.3641, "step": 23624 }, { "epoch": 0.8460615610507278, "grad_norm": 1.6413108110427856, "learning_rate": 1.2171151573040085e-05, "loss": 1.5172, "step": 23625 }, { "epoch": 0.8460973731802962, "grad_norm": 2.129379987716675, "learning_rate": 1.2165606315176691e-05, "loss": 1.7346, "step": 23626 }, { "epoch": 0.8461331853098645, "grad_norm": 1.699070930480957, "learning_rate": 1.2160062239011739e-05, "loss": 1.3449, "step": 23627 }, { "epoch": 0.8461689974394327, "grad_norm": 1.3669846057891846, "learning_rate": 1.2154519344619841e-05, "loss": 1.5008, "step": 23628 }, { "epoch": 0.846204809569001, "grad_norm": 1.5487329959869385, "learning_rate": 1.2148977632075598e-05, "loss": 1.569, "step": 23629 }, { "epoch": 0.8462406216985693, "grad_norm": 2.4239537715911865, "learning_rate": 1.2143437101453514e-05, "loss": 1.4715, "step": 23630 }, { "epoch": 0.8462764338281376, "grad_norm": 1.408677101135254, "learning_rate": 1.2137897752828165e-05, "loss": 1.3176, "step": 23631 }, { "epoch": 0.8463122459577058, "grad_norm": 1.3192760944366455, "learning_rate": 1.2132359586274067e-05, "loss": 1.4138, "step": 23632 }, { "epoch": 0.8463480580872742, "grad_norm": 1.8194007873535156, "learning_rate": 1.212682260186575e-05, "loss": 1.7193, "step": 23633 }, { "epoch": 0.8463838702168425, "grad_norm": 1.4878827333450317, "learning_rate": 1.2121286799677667e-05, "loss": 0.9791, "step": 23634 }, { "epoch": 0.8464196823464107, "grad_norm": 1.69992995262146, "learning_rate": 1.2115752179784312e-05, "loss": 1.4171, "step": 23635 }, { "epoch": 0.846455494475979, "grad_norm": 1.8349891901016235, "learning_rate": 1.211021874226015e-05, "loss": 1.8357, "step": 23636 }, { "epoch": 0.8464913066055473, "grad_norm": 1.3100531101226807, "learning_rate": 1.2104686487179639e-05, "loss": 1.4282, "step": 23637 }, { "epoch": 0.8465271187351155, "grad_norm": 1.4935340881347656, "learning_rate": 1.209915541461718e-05, "loss": 1.4017, "step": 23638 }, { "epoch": 0.8465629308646838, "grad_norm": 1.7822998762130737, "learning_rate": 1.20936255246472e-05, "loss": 1.1659, "step": 23639 }, { "epoch": 0.8465987429942522, "grad_norm": 1.7286075353622437, "learning_rate": 1.2088096817344118e-05, "loss": 1.8532, "step": 23640 }, { "epoch": 0.8466345551238205, "grad_norm": 1.912163496017456, "learning_rate": 1.2082569292782275e-05, "loss": 1.4164, "step": 23641 }, { "epoch": 0.8466703672533887, "grad_norm": 1.4342498779296875, "learning_rate": 1.2077042951036055e-05, "loss": 1.1178, "step": 23642 }, { "epoch": 0.846706179382957, "grad_norm": 2.4091644287109375, "learning_rate": 1.207151779217981e-05, "loss": 1.2119, "step": 23643 }, { "epoch": 0.8467419915125253, "grad_norm": 1.5198973417282104, "learning_rate": 1.2065993816287901e-05, "loss": 1.1991, "step": 23644 }, { "epoch": 0.8467778036420935, "grad_norm": 1.4124888181686401, "learning_rate": 1.2060471023434594e-05, "loss": 1.6583, "step": 23645 }, { "epoch": 0.8468136157716618, "grad_norm": 1.6898411512374878, "learning_rate": 1.2054949413694216e-05, "loss": 1.2365, "step": 23646 }, { "epoch": 0.8468494279012302, "grad_norm": 1.4667853116989136, "learning_rate": 1.2049428987141065e-05, "loss": 1.5452, "step": 23647 }, { "epoch": 0.8468852400307985, "grad_norm": 1.607649326324463, "learning_rate": 1.204390974384939e-05, "loss": 1.347, "step": 23648 }, { "epoch": 0.8469210521603667, "grad_norm": 1.8424022197723389, "learning_rate": 1.2038391683893446e-05, "loss": 1.7185, "step": 23649 }, { "epoch": 0.846956864289935, "grad_norm": 1.5463377237319946, "learning_rate": 1.2032874807347484e-05, "loss": 1.5278, "step": 23650 }, { "epoch": 0.8469926764195033, "grad_norm": 1.372738242149353, "learning_rate": 1.2027359114285741e-05, "loss": 1.4912, "step": 23651 }, { "epoch": 0.8470284885490715, "grad_norm": 1.7172951698303223, "learning_rate": 1.2021844604782384e-05, "loss": 1.6273, "step": 23652 }, { "epoch": 0.8470643006786398, "grad_norm": 1.8303214311599731, "learning_rate": 1.2016331278911619e-05, "loss": 1.2777, "step": 23653 }, { "epoch": 0.8471001128082082, "grad_norm": 1.78458833694458, "learning_rate": 1.201081913674763e-05, "loss": 1.6966, "step": 23654 }, { "epoch": 0.8471359249377765, "grad_norm": 1.1796941757202148, "learning_rate": 1.2005308178364593e-05, "loss": 1.457, "step": 23655 }, { "epoch": 0.8471717370673447, "grad_norm": 1.6931653022766113, "learning_rate": 1.1999798403836615e-05, "loss": 1.0998, "step": 23656 }, { "epoch": 0.847207549196913, "grad_norm": 1.5502809286117554, "learning_rate": 1.1994289813237835e-05, "loss": 1.3704, "step": 23657 }, { "epoch": 0.8472433613264813, "grad_norm": 1.3517582416534424, "learning_rate": 1.1988782406642385e-05, "loss": 1.149, "step": 23658 }, { "epoch": 0.8472791734560495, "grad_norm": 1.7601218223571777, "learning_rate": 1.1983276184124314e-05, "loss": 1.4605, "step": 23659 }, { "epoch": 0.8473149855856178, "grad_norm": 1.4631924629211426, "learning_rate": 1.1977771145757733e-05, "loss": 1.5351, "step": 23660 }, { "epoch": 0.8473507977151862, "grad_norm": 1.9184998273849487, "learning_rate": 1.1972267291616702e-05, "loss": 1.5686, "step": 23661 }, { "epoch": 0.8473866098447544, "grad_norm": 1.5700645446777344, "learning_rate": 1.1966764621775284e-05, "loss": 1.1868, "step": 23662 }, { "epoch": 0.8474224219743227, "grad_norm": 1.7700568437576294, "learning_rate": 1.1961263136307477e-05, "loss": 1.2635, "step": 23663 }, { "epoch": 0.847458234103891, "grad_norm": 1.7117582559585571, "learning_rate": 1.195576283528731e-05, "loss": 1.5889, "step": 23664 }, { "epoch": 0.8474940462334593, "grad_norm": 1.486767053604126, "learning_rate": 1.1950263718788812e-05, "loss": 1.6332, "step": 23665 }, { "epoch": 0.8475298583630275, "grad_norm": 1.6834611892700195, "learning_rate": 1.1944765786885914e-05, "loss": 1.4774, "step": 23666 }, { "epoch": 0.8475656704925958, "grad_norm": 1.8783506155014038, "learning_rate": 1.1939269039652612e-05, "loss": 1.5829, "step": 23667 }, { "epoch": 0.8476014826221642, "grad_norm": 1.8544740676879883, "learning_rate": 1.1933773477162847e-05, "loss": 1.6606, "step": 23668 }, { "epoch": 0.8476372947517324, "grad_norm": 1.5424895286560059, "learning_rate": 1.192827909949059e-05, "loss": 1.3291, "step": 23669 }, { "epoch": 0.8476731068813007, "grad_norm": 1.9440540075302124, "learning_rate": 1.1922785906709711e-05, "loss": 1.461, "step": 23670 }, { "epoch": 0.847708919010869, "grad_norm": 2.229961395263672, "learning_rate": 1.1917293898894145e-05, "loss": 1.4525, "step": 23671 }, { "epoch": 0.8477447311404372, "grad_norm": 1.5149637460708618, "learning_rate": 1.1911803076117777e-05, "loss": 1.3611, "step": 23672 }, { "epoch": 0.8477805432700055, "grad_norm": 1.4159404039382935, "learning_rate": 1.1906313438454464e-05, "loss": 1.444, "step": 23673 }, { "epoch": 0.8478163553995738, "grad_norm": 1.359679937362671, "learning_rate": 1.1900824985978066e-05, "loss": 1.1712, "step": 23674 }, { "epoch": 0.8478521675291422, "grad_norm": 1.24271821975708, "learning_rate": 1.1895337718762422e-05, "loss": 1.493, "step": 23675 }, { "epoch": 0.8478879796587104, "grad_norm": 1.549537181854248, "learning_rate": 1.1889851636881388e-05, "loss": 1.3426, "step": 23676 }, { "epoch": 0.8479237917882787, "grad_norm": 1.8525686264038086, "learning_rate": 1.1884366740408726e-05, "loss": 1.4973, "step": 23677 }, { "epoch": 0.847959603917847, "grad_norm": 1.6255896091461182, "learning_rate": 1.1878883029418253e-05, "loss": 1.6222, "step": 23678 }, { "epoch": 0.8479954160474152, "grad_norm": 1.3341083526611328, "learning_rate": 1.1873400503983733e-05, "loss": 1.4786, "step": 23679 }, { "epoch": 0.8480312281769835, "grad_norm": 1.8212075233459473, "learning_rate": 1.1867919164178964e-05, "loss": 1.4576, "step": 23680 }, { "epoch": 0.8480670403065518, "grad_norm": 1.6231342554092407, "learning_rate": 1.1862439010077653e-05, "loss": 1.5524, "step": 23681 }, { "epoch": 0.8481028524361202, "grad_norm": 1.5469783544540405, "learning_rate": 1.1856960041753495e-05, "loss": 1.3759, "step": 23682 }, { "epoch": 0.8481386645656884, "grad_norm": 1.431070327758789, "learning_rate": 1.185148225928029e-05, "loss": 1.7274, "step": 23683 }, { "epoch": 0.8481744766952567, "grad_norm": 1.7324550151824951, "learning_rate": 1.1846005662731663e-05, "loss": 1.6348, "step": 23684 }, { "epoch": 0.848210288824825, "grad_norm": 1.7936501502990723, "learning_rate": 1.1840530252181336e-05, "loss": 1.6526, "step": 23685 }, { "epoch": 0.8482461009543932, "grad_norm": 1.3822287321090698, "learning_rate": 1.1835056027702918e-05, "loss": 1.3794, "step": 23686 }, { "epoch": 0.8482819130839615, "grad_norm": 1.904231071472168, "learning_rate": 1.1829582989370148e-05, "loss": 1.6432, "step": 23687 }, { "epoch": 0.8483177252135298, "grad_norm": 2.0496718883514404, "learning_rate": 1.1824111137256577e-05, "loss": 1.3671, "step": 23688 }, { "epoch": 0.8483535373430982, "grad_norm": 2.090599298477173, "learning_rate": 1.1818640471435848e-05, "loss": 1.6222, "step": 23689 }, { "epoch": 0.8483893494726664, "grad_norm": 1.9762262105941772, "learning_rate": 1.1813170991981593e-05, "loss": 1.5517, "step": 23690 }, { "epoch": 0.8484251616022347, "grad_norm": 1.5518031120300293, "learning_rate": 1.1807702698967349e-05, "loss": 1.4667, "step": 23691 }, { "epoch": 0.848460973731803, "grad_norm": 1.2942707538604736, "learning_rate": 1.1802235592466727e-05, "loss": 1.093, "step": 23692 }, { "epoch": 0.8484967858613712, "grad_norm": 2.022648334503174, "learning_rate": 1.179676967255321e-05, "loss": 1.55, "step": 23693 }, { "epoch": 0.8485325979909395, "grad_norm": 1.2496730089187622, "learning_rate": 1.1791304939300429e-05, "loss": 0.9355, "step": 23694 }, { "epoch": 0.8485684101205078, "grad_norm": 1.1895911693572998, "learning_rate": 1.1785841392781838e-05, "loss": 1.1347, "step": 23695 }, { "epoch": 0.8486042222500761, "grad_norm": 1.413276195526123, "learning_rate": 1.1780379033070988e-05, "loss": 1.4609, "step": 23696 }, { "epoch": 0.8486400343796444, "grad_norm": 1.4332022666931152, "learning_rate": 1.1774917860241297e-05, "loss": 1.478, "step": 23697 }, { "epoch": 0.8486758465092127, "grad_norm": 1.54298734664917, "learning_rate": 1.1769457874366318e-05, "loss": 1.2612, "step": 23698 }, { "epoch": 0.848711658638781, "grad_norm": 1.376090168952942, "learning_rate": 1.1763999075519482e-05, "loss": 1.2319, "step": 23699 }, { "epoch": 0.8487474707683492, "grad_norm": 1.784658670425415, "learning_rate": 1.1758541463774186e-05, "loss": 1.53, "step": 23700 }, { "epoch": 0.8487832828979175, "grad_norm": 2.318807363510132, "learning_rate": 1.1753085039203926e-05, "loss": 1.2602, "step": 23701 }, { "epoch": 0.8488190950274858, "grad_norm": 1.3570774793624878, "learning_rate": 1.1747629801882054e-05, "loss": 1.635, "step": 23702 }, { "epoch": 0.8488549071570541, "grad_norm": 2.0229833126068115, "learning_rate": 1.1742175751882012e-05, "loss": 1.5221, "step": 23703 }, { "epoch": 0.8488907192866224, "grad_norm": 1.5566153526306152, "learning_rate": 1.1736722889277107e-05, "loss": 1.4815, "step": 23704 }, { "epoch": 0.8489265314161907, "grad_norm": 2.224531888961792, "learning_rate": 1.1731271214140783e-05, "loss": 1.5376, "step": 23705 }, { "epoch": 0.848962343545759, "grad_norm": 2.213486671447754, "learning_rate": 1.1725820726546322e-05, "loss": 1.5942, "step": 23706 }, { "epoch": 0.8489981556753272, "grad_norm": 1.877007246017456, "learning_rate": 1.1720371426567111e-05, "loss": 1.4773, "step": 23707 }, { "epoch": 0.8490339678048955, "grad_norm": 1.609832763671875, "learning_rate": 1.1714923314276405e-05, "loss": 1.2998, "step": 23708 }, { "epoch": 0.8490697799344638, "grad_norm": 1.7458049058914185, "learning_rate": 1.170947638974752e-05, "loss": 1.4685, "step": 23709 }, { "epoch": 0.8491055920640321, "grad_norm": 2.7425076961517334, "learning_rate": 1.1704030653053766e-05, "loss": 1.5496, "step": 23710 }, { "epoch": 0.8491414041936004, "grad_norm": 1.5261955261230469, "learning_rate": 1.1698586104268372e-05, "loss": 1.4367, "step": 23711 }, { "epoch": 0.8491772163231687, "grad_norm": 1.8349772691726685, "learning_rate": 1.169314274346459e-05, "loss": 1.3865, "step": 23712 }, { "epoch": 0.8492130284527369, "grad_norm": 1.9247404336929321, "learning_rate": 1.1687700570715677e-05, "loss": 1.3468, "step": 23713 }, { "epoch": 0.8492488405823052, "grad_norm": 1.6313281059265137, "learning_rate": 1.1682259586094845e-05, "loss": 1.3691, "step": 23714 }, { "epoch": 0.8492846527118735, "grad_norm": 1.6033432483673096, "learning_rate": 1.1676819789675264e-05, "loss": 1.3165, "step": 23715 }, { "epoch": 0.8493204648414417, "grad_norm": 1.6103150844573975, "learning_rate": 1.1671381181530171e-05, "loss": 1.4167, "step": 23716 }, { "epoch": 0.8493562769710101, "grad_norm": 1.6567761898040771, "learning_rate": 1.1665943761732712e-05, "loss": 1.4802, "step": 23717 }, { "epoch": 0.8493920891005784, "grad_norm": 1.9496479034423828, "learning_rate": 1.1660507530356024e-05, "loss": 1.4007, "step": 23718 }, { "epoch": 0.8494279012301467, "grad_norm": 1.6476153135299683, "learning_rate": 1.1655072487473251e-05, "loss": 1.3635, "step": 23719 }, { "epoch": 0.8494637133597149, "grad_norm": 1.7189626693725586, "learning_rate": 1.1649638633157523e-05, "loss": 1.6882, "step": 23720 }, { "epoch": 0.8494995254892832, "grad_norm": 1.4892523288726807, "learning_rate": 1.1644205967481959e-05, "loss": 1.2611, "step": 23721 }, { "epoch": 0.8495353376188515, "grad_norm": 1.507788896560669, "learning_rate": 1.1638774490519622e-05, "loss": 1.5566, "step": 23722 }, { "epoch": 0.8495711497484197, "grad_norm": 2.1150612831115723, "learning_rate": 1.1633344202343587e-05, "loss": 1.3369, "step": 23723 }, { "epoch": 0.8496069618779881, "grad_norm": 1.6760443449020386, "learning_rate": 1.162791510302692e-05, "loss": 1.0433, "step": 23724 }, { "epoch": 0.8496427740075564, "grad_norm": 1.6764628887176514, "learning_rate": 1.1622487192642694e-05, "loss": 1.3644, "step": 23725 }, { "epoch": 0.8496785861371247, "grad_norm": 1.971871018409729, "learning_rate": 1.1617060471263875e-05, "loss": 1.2388, "step": 23726 }, { "epoch": 0.8497143982666929, "grad_norm": 2.7978687286376953, "learning_rate": 1.1611634938963512e-05, "loss": 1.2827, "step": 23727 }, { "epoch": 0.8497502103962612, "grad_norm": 2.1130940914154053, "learning_rate": 1.1606210595814593e-05, "loss": 1.4592, "step": 23728 }, { "epoch": 0.8497860225258295, "grad_norm": 1.4283808469772339, "learning_rate": 1.1600787441890082e-05, "loss": 1.4418, "step": 23729 }, { "epoch": 0.8498218346553977, "grad_norm": 1.7132954597473145, "learning_rate": 1.1595365477262944e-05, "loss": 1.5116, "step": 23730 }, { "epoch": 0.8498576467849661, "grad_norm": 1.5262160301208496, "learning_rate": 1.1589944702006129e-05, "loss": 1.1867, "step": 23731 }, { "epoch": 0.8498934589145344, "grad_norm": 2.8375978469848633, "learning_rate": 1.158452511619259e-05, "loss": 1.5645, "step": 23732 }, { "epoch": 0.8499292710441027, "grad_norm": 1.3872442245483398, "learning_rate": 1.1579106719895205e-05, "loss": 1.4656, "step": 23733 }, { "epoch": 0.8499650831736709, "grad_norm": 2.849210262298584, "learning_rate": 1.157368951318687e-05, "loss": 1.7573, "step": 23734 }, { "epoch": 0.8500008953032392, "grad_norm": 1.7254695892333984, "learning_rate": 1.1568273496140513e-05, "loss": 1.4489, "step": 23735 }, { "epoch": 0.8500367074328075, "grad_norm": 1.4236689805984497, "learning_rate": 1.1562858668828936e-05, "loss": 1.6148, "step": 23736 }, { "epoch": 0.8500725195623757, "grad_norm": 1.5997804403305054, "learning_rate": 1.1557445031325032e-05, "loss": 1.3078, "step": 23737 }, { "epoch": 0.8501083316919441, "grad_norm": 2.8871943950653076, "learning_rate": 1.1552032583701612e-05, "loss": 1.547, "step": 23738 }, { "epoch": 0.8501441438215124, "grad_norm": 1.5523663759231567, "learning_rate": 1.1546621326031526e-05, "loss": 1.407, "step": 23739 }, { "epoch": 0.8501799559510806, "grad_norm": 1.4825963973999023, "learning_rate": 1.154121125838754e-05, "loss": 1.4152, "step": 23740 }, { "epoch": 0.8502157680806489, "grad_norm": 1.7809697389602661, "learning_rate": 1.1535802380842453e-05, "loss": 1.2848, "step": 23741 }, { "epoch": 0.8502515802102172, "grad_norm": 1.843872308731079, "learning_rate": 1.1530394693469026e-05, "loss": 1.4454, "step": 23742 }, { "epoch": 0.8502873923397855, "grad_norm": 1.6552728414535522, "learning_rate": 1.1524988196340048e-05, "loss": 1.4238, "step": 23743 }, { "epoch": 0.8503232044693537, "grad_norm": 1.1542280912399292, "learning_rate": 1.1519582889528202e-05, "loss": 1.3864, "step": 23744 }, { "epoch": 0.8503590165989221, "grad_norm": 1.427795648574829, "learning_rate": 1.1514178773106243e-05, "loss": 1.2635, "step": 23745 }, { "epoch": 0.8503948287284904, "grad_norm": 1.981183409690857, "learning_rate": 1.150877584714689e-05, "loss": 1.3967, "step": 23746 }, { "epoch": 0.8504306408580586, "grad_norm": 1.5299640893936157, "learning_rate": 1.1503374111722786e-05, "loss": 1.6025, "step": 23747 }, { "epoch": 0.8504664529876269, "grad_norm": 1.5333250761032104, "learning_rate": 1.149797356690664e-05, "loss": 1.37, "step": 23748 }, { "epoch": 0.8505022651171952, "grad_norm": 1.6742618083953857, "learning_rate": 1.149257421277109e-05, "loss": 1.4453, "step": 23749 }, { "epoch": 0.8505380772467634, "grad_norm": 1.8862006664276123, "learning_rate": 1.1487176049388814e-05, "loss": 1.4248, "step": 23750 }, { "epoch": 0.8505738893763317, "grad_norm": 1.4315221309661865, "learning_rate": 1.1481779076832388e-05, "loss": 1.3406, "step": 23751 }, { "epoch": 0.8506097015059001, "grad_norm": 1.759994387626648, "learning_rate": 1.1476383295174452e-05, "loss": 1.6171, "step": 23752 }, { "epoch": 0.8506455136354684, "grad_norm": 2.4337728023529053, "learning_rate": 1.1470988704487607e-05, "loss": 1.2688, "step": 23753 }, { "epoch": 0.8506813257650366, "grad_norm": 1.9323700666427612, "learning_rate": 1.146559530484439e-05, "loss": 1.4573, "step": 23754 }, { "epoch": 0.8507171378946049, "grad_norm": 1.7567484378814697, "learning_rate": 1.146020309631739e-05, "loss": 1.5855, "step": 23755 }, { "epoch": 0.8507529500241732, "grad_norm": 1.5216503143310547, "learning_rate": 1.145481207897915e-05, "loss": 1.3883, "step": 23756 }, { "epoch": 0.8507887621537414, "grad_norm": 1.536760926246643, "learning_rate": 1.144942225290222e-05, "loss": 1.4571, "step": 23757 }, { "epoch": 0.8508245742833097, "grad_norm": 2.1241402626037598, "learning_rate": 1.1444033618159068e-05, "loss": 1.9264, "step": 23758 }, { "epoch": 0.8508603864128781, "grad_norm": 1.6003999710083008, "learning_rate": 1.143864617482222e-05, "loss": 1.7025, "step": 23759 }, { "epoch": 0.8508961985424464, "grad_norm": 1.6267139911651611, "learning_rate": 1.1433259922964146e-05, "loss": 1.2968, "step": 23760 }, { "epoch": 0.8509320106720146, "grad_norm": 1.788483738899231, "learning_rate": 1.1427874862657339e-05, "loss": 1.4734, "step": 23761 }, { "epoch": 0.8509678228015829, "grad_norm": 1.3906162977218628, "learning_rate": 1.1422490993974199e-05, "loss": 1.0226, "step": 23762 }, { "epoch": 0.8510036349311512, "grad_norm": 2.156611204147339, "learning_rate": 1.1417108316987201e-05, "loss": 1.4735, "step": 23763 }, { "epoch": 0.8510394470607194, "grad_norm": 1.2706553936004639, "learning_rate": 1.1411726831768754e-05, "loss": 1.6045, "step": 23764 }, { "epoch": 0.8510752591902877, "grad_norm": 1.6218516826629639, "learning_rate": 1.1406346538391243e-05, "loss": 1.5444, "step": 23765 }, { "epoch": 0.8511110713198561, "grad_norm": 2.312868118286133, "learning_rate": 1.1400967436927056e-05, "loss": 1.3219, "step": 23766 }, { "epoch": 0.8511468834494244, "grad_norm": 1.7224329710006714, "learning_rate": 1.1395589527448558e-05, "loss": 1.5206, "step": 23767 }, { "epoch": 0.8511826955789926, "grad_norm": 1.7545734643936157, "learning_rate": 1.1390212810028144e-05, "loss": 1.8438, "step": 23768 }, { "epoch": 0.8512185077085609, "grad_norm": 1.76412034034729, "learning_rate": 1.1384837284738114e-05, "loss": 1.1991, "step": 23769 }, { "epoch": 0.8512543198381292, "grad_norm": 1.5692112445831299, "learning_rate": 1.1379462951650755e-05, "loss": 1.4919, "step": 23770 }, { "epoch": 0.8512901319676974, "grad_norm": 1.500417947769165, "learning_rate": 1.137408981083845e-05, "loss": 1.0246, "step": 23771 }, { "epoch": 0.8513259440972657, "grad_norm": 2.5152666568756104, "learning_rate": 1.1368717862373424e-05, "loss": 1.4325, "step": 23772 }, { "epoch": 0.8513617562268341, "grad_norm": 1.9641667604446411, "learning_rate": 1.136334710632797e-05, "loss": 1.3025, "step": 23773 }, { "epoch": 0.8513975683564023, "grad_norm": 1.6455731391906738, "learning_rate": 1.1357977542774356e-05, "loss": 1.2259, "step": 23774 }, { "epoch": 0.8514333804859706, "grad_norm": 2.0032389163970947, "learning_rate": 1.1352609171784834e-05, "loss": 1.2823, "step": 23775 }, { "epoch": 0.8514691926155389, "grad_norm": 1.3516676425933838, "learning_rate": 1.1347241993431578e-05, "loss": 1.4081, "step": 23776 }, { "epoch": 0.8515050047451072, "grad_norm": 2.0282421112060547, "learning_rate": 1.1341876007786845e-05, "loss": 1.5955, "step": 23777 }, { "epoch": 0.8515408168746754, "grad_norm": 2.355489492416382, "learning_rate": 1.1336511214922819e-05, "loss": 1.2484, "step": 23778 }, { "epoch": 0.8515766290042437, "grad_norm": 1.3258121013641357, "learning_rate": 1.1331147614911641e-05, "loss": 1.5587, "step": 23779 }, { "epoch": 0.8516124411338121, "grad_norm": 1.586104154586792, "learning_rate": 1.1325785207825524e-05, "loss": 1.3955, "step": 23780 }, { "epoch": 0.8516482532633803, "grad_norm": 1.611124038696289, "learning_rate": 1.132042399373654e-05, "loss": 1.3186, "step": 23781 }, { "epoch": 0.8516840653929486, "grad_norm": 1.5817452669143677, "learning_rate": 1.131506397271691e-05, "loss": 1.7507, "step": 23782 }, { "epoch": 0.8517198775225169, "grad_norm": 1.5858224630355835, "learning_rate": 1.1309705144838678e-05, "loss": 1.5766, "step": 23783 }, { "epoch": 0.8517556896520851, "grad_norm": 2.0559260845184326, "learning_rate": 1.1304347510173963e-05, "loss": 1.5677, "step": 23784 }, { "epoch": 0.8517915017816534, "grad_norm": 1.479744791984558, "learning_rate": 1.129899106879484e-05, "loss": 1.4073, "step": 23785 }, { "epoch": 0.8518273139112217, "grad_norm": 1.3566659688949585, "learning_rate": 1.1293635820773397e-05, "loss": 1.2683, "step": 23786 }, { "epoch": 0.8518631260407901, "grad_norm": 1.4227968454360962, "learning_rate": 1.1288281766181651e-05, "loss": 1.4598, "step": 23787 }, { "epoch": 0.8518989381703583, "grad_norm": 1.8010733127593994, "learning_rate": 1.1282928905091616e-05, "loss": 1.268, "step": 23788 }, { "epoch": 0.8519347502999266, "grad_norm": 1.2809944152832031, "learning_rate": 1.1277577237575377e-05, "loss": 1.6804, "step": 23789 }, { "epoch": 0.8519705624294949, "grad_norm": 1.514971375465393, "learning_rate": 1.1272226763704863e-05, "loss": 1.7797, "step": 23790 }, { "epoch": 0.8520063745590631, "grad_norm": 2.1083321571350098, "learning_rate": 1.1266877483552118e-05, "loss": 1.374, "step": 23791 }, { "epoch": 0.8520421866886314, "grad_norm": 1.2880560159683228, "learning_rate": 1.126152939718903e-05, "loss": 1.582, "step": 23792 }, { "epoch": 0.8520779988181997, "grad_norm": 1.4016788005828857, "learning_rate": 1.125618250468764e-05, "loss": 1.5074, "step": 23793 }, { "epoch": 0.8521138109477681, "grad_norm": 1.4981015920639038, "learning_rate": 1.1250836806119824e-05, "loss": 1.3985, "step": 23794 }, { "epoch": 0.8521496230773363, "grad_norm": 1.716374397277832, "learning_rate": 1.1245492301557547e-05, "loss": 1.3592, "step": 23795 }, { "epoch": 0.8521854352069046, "grad_norm": 1.9721206426620483, "learning_rate": 1.1240148991072662e-05, "loss": 1.3089, "step": 23796 }, { "epoch": 0.8522212473364729, "grad_norm": 1.718814492225647, "learning_rate": 1.123480687473708e-05, "loss": 1.5038, "step": 23797 }, { "epoch": 0.8522570594660411, "grad_norm": 2.706646203994751, "learning_rate": 1.1229465952622686e-05, "loss": 1.4359, "step": 23798 }, { "epoch": 0.8522928715956094, "grad_norm": 1.659623622894287, "learning_rate": 1.122412622480129e-05, "loss": 1.4396, "step": 23799 }, { "epoch": 0.8523286837251777, "grad_norm": 1.6759545803070068, "learning_rate": 1.1218787691344801e-05, "loss": 1.6162, "step": 23800 }, { "epoch": 0.852364495854746, "grad_norm": 2.4348177909851074, "learning_rate": 1.1213450352324983e-05, "loss": 1.8575, "step": 23801 }, { "epoch": 0.8524003079843143, "grad_norm": 1.6588647365570068, "learning_rate": 1.1208114207813691e-05, "loss": 1.6102, "step": 23802 }, { "epoch": 0.8524361201138826, "grad_norm": 1.6332166194915771, "learning_rate": 1.1202779257882645e-05, "loss": 1.357, "step": 23803 }, { "epoch": 0.8524719322434509, "grad_norm": 1.6586291790008545, "learning_rate": 1.1197445502603698e-05, "loss": 1.4706, "step": 23804 }, { "epoch": 0.8525077443730191, "grad_norm": 1.4110736846923828, "learning_rate": 1.1192112942048582e-05, "loss": 1.311, "step": 23805 }, { "epoch": 0.8525435565025874, "grad_norm": 1.5690315961837769, "learning_rate": 1.1186781576289007e-05, "loss": 1.3135, "step": 23806 }, { "epoch": 0.8525793686321557, "grad_norm": 1.8064452409744263, "learning_rate": 1.1181451405396725e-05, "loss": 1.6223, "step": 23807 }, { "epoch": 0.852615180761724, "grad_norm": 1.4428201913833618, "learning_rate": 1.1176122429443458e-05, "loss": 1.4009, "step": 23808 }, { "epoch": 0.8526509928912923, "grad_norm": 1.7909198999404907, "learning_rate": 1.1170794648500893e-05, "loss": 1.3315, "step": 23809 }, { "epoch": 0.8526868050208606, "grad_norm": 1.5551303625106812, "learning_rate": 1.116546806264067e-05, "loss": 1.4318, "step": 23810 }, { "epoch": 0.8527226171504289, "grad_norm": 1.7789667844772339, "learning_rate": 1.1160142671934537e-05, "loss": 1.3732, "step": 23811 }, { "epoch": 0.8527584292799971, "grad_norm": 1.6274534463882446, "learning_rate": 1.1154818476454054e-05, "loss": 1.4462, "step": 23812 }, { "epoch": 0.8527942414095654, "grad_norm": 1.3836264610290527, "learning_rate": 1.114949547627091e-05, "loss": 1.5112, "step": 23813 }, { "epoch": 0.8528300535391337, "grad_norm": 1.9895684719085693, "learning_rate": 1.1144173671456682e-05, "loss": 1.3913, "step": 23814 }, { "epoch": 0.852865865668702, "grad_norm": 2.5720231533050537, "learning_rate": 1.1138853062082977e-05, "loss": 1.4404, "step": 23815 }, { "epoch": 0.8529016777982703, "grad_norm": 1.6841319799423218, "learning_rate": 1.1133533648221405e-05, "loss": 1.2546, "step": 23816 }, { "epoch": 0.8529374899278386, "grad_norm": 1.8937181234359741, "learning_rate": 1.1128215429943477e-05, "loss": 1.3837, "step": 23817 }, { "epoch": 0.8529733020574068, "grad_norm": 1.6849440336227417, "learning_rate": 1.1122898407320791e-05, "loss": 1.3869, "step": 23818 }, { "epoch": 0.8530091141869751, "grad_norm": 1.8310136795043945, "learning_rate": 1.1117582580424857e-05, "loss": 1.2157, "step": 23819 }, { "epoch": 0.8530449263165434, "grad_norm": 1.718644380569458, "learning_rate": 1.1112267949327216e-05, "loss": 1.3847, "step": 23820 }, { "epoch": 0.8530807384461117, "grad_norm": 1.7329232692718506, "learning_rate": 1.1106954514099332e-05, "loss": 1.3567, "step": 23821 }, { "epoch": 0.85311655057568, "grad_norm": 1.7883803844451904, "learning_rate": 1.1101642274812706e-05, "loss": 1.5081, "step": 23822 }, { "epoch": 0.8531523627052483, "grad_norm": 1.6889233589172363, "learning_rate": 1.1096331231538847e-05, "loss": 1.2767, "step": 23823 }, { "epoch": 0.8531881748348166, "grad_norm": 1.7603107690811157, "learning_rate": 1.1091021384349143e-05, "loss": 1.3097, "step": 23824 }, { "epoch": 0.8532239869643848, "grad_norm": 1.8802781105041504, "learning_rate": 1.1085712733315068e-05, "loss": 1.3812, "step": 23825 }, { "epoch": 0.8532597990939531, "grad_norm": 1.5397545099258423, "learning_rate": 1.1080405278508033e-05, "loss": 1.4916, "step": 23826 }, { "epoch": 0.8532956112235214, "grad_norm": 1.7694755792617798, "learning_rate": 1.1075099019999468e-05, "loss": 1.6139, "step": 23827 }, { "epoch": 0.8533314233530896, "grad_norm": 1.587158441543579, "learning_rate": 1.106979395786072e-05, "loss": 1.7875, "step": 23828 }, { "epoch": 0.853367235482658, "grad_norm": 1.5730561017990112, "learning_rate": 1.1064490092163181e-05, "loss": 1.2947, "step": 23829 }, { "epoch": 0.8534030476122263, "grad_norm": 1.4817062616348267, "learning_rate": 1.1059187422978211e-05, "loss": 1.5571, "step": 23830 }, { "epoch": 0.8534388597417946, "grad_norm": 2.2228071689605713, "learning_rate": 1.1053885950377174e-05, "loss": 1.4434, "step": 23831 }, { "epoch": 0.8534746718713628, "grad_norm": 1.7988253831863403, "learning_rate": 1.1048585674431345e-05, "loss": 1.5946, "step": 23832 }, { "epoch": 0.8535104840009311, "grad_norm": 1.7007036209106445, "learning_rate": 1.1043286595212054e-05, "loss": 1.3761, "step": 23833 }, { "epoch": 0.8535462961304994, "grad_norm": 1.7106313705444336, "learning_rate": 1.1037988712790626e-05, "loss": 1.3455, "step": 23834 }, { "epoch": 0.8535821082600676, "grad_norm": 1.68916916847229, "learning_rate": 1.1032692027238279e-05, "loss": 1.4494, "step": 23835 }, { "epoch": 0.853617920389636, "grad_norm": 1.5535471439361572, "learning_rate": 1.10273965386263e-05, "loss": 1.4831, "step": 23836 }, { "epoch": 0.8536537325192043, "grad_norm": 1.5345951318740845, "learning_rate": 1.1022102247025934e-05, "loss": 1.6866, "step": 23837 }, { "epoch": 0.8536895446487726, "grad_norm": 1.2427003383636475, "learning_rate": 1.1016809152508434e-05, "loss": 1.3729, "step": 23838 }, { "epoch": 0.8537253567783408, "grad_norm": 1.175044298171997, "learning_rate": 1.1011517255144965e-05, "loss": 1.3661, "step": 23839 }, { "epoch": 0.8537611689079091, "grad_norm": 1.9435486793518066, "learning_rate": 1.1006226555006749e-05, "loss": 1.1561, "step": 23840 }, { "epoch": 0.8537969810374774, "grad_norm": 1.4957728385925293, "learning_rate": 1.1000937052164973e-05, "loss": 1.2308, "step": 23841 }, { "epoch": 0.8538327931670456, "grad_norm": 1.851515293121338, "learning_rate": 1.0995648746690768e-05, "loss": 1.1669, "step": 23842 }, { "epoch": 0.853868605296614, "grad_norm": 1.4531022310256958, "learning_rate": 1.0990361638655311e-05, "loss": 1.3593, "step": 23843 }, { "epoch": 0.8539044174261823, "grad_norm": 1.6683907508850098, "learning_rate": 1.0985075728129712e-05, "loss": 1.413, "step": 23844 }, { "epoch": 0.8539402295557506, "grad_norm": 2.5751333236694336, "learning_rate": 1.0979791015185125e-05, "loss": 1.5377, "step": 23845 }, { "epoch": 0.8539760416853188, "grad_norm": 1.8338749408721924, "learning_rate": 1.0974507499892605e-05, "loss": 1.6186, "step": 23846 }, { "epoch": 0.8540118538148871, "grad_norm": 1.6571879386901855, "learning_rate": 1.0969225182323239e-05, "loss": 1.3968, "step": 23847 }, { "epoch": 0.8540476659444554, "grad_norm": 1.5902742147445679, "learning_rate": 1.0963944062548125e-05, "loss": 1.466, "step": 23848 }, { "epoch": 0.8540834780740236, "grad_norm": 1.432607650756836, "learning_rate": 1.0958664140638297e-05, "loss": 1.4103, "step": 23849 }, { "epoch": 0.854119290203592, "grad_norm": 1.4626818895339966, "learning_rate": 1.0953385416664785e-05, "loss": 1.0073, "step": 23850 }, { "epoch": 0.8541551023331603, "grad_norm": 2.2217087745666504, "learning_rate": 1.09481078906986e-05, "loss": 1.4524, "step": 23851 }, { "epoch": 0.8541909144627285, "grad_norm": 1.7042936086654663, "learning_rate": 1.0942831562810774e-05, "loss": 1.5566, "step": 23852 }, { "epoch": 0.8542267265922968, "grad_norm": 1.6705306768417358, "learning_rate": 1.093755643307226e-05, "loss": 1.3809, "step": 23853 }, { "epoch": 0.8542625387218651, "grad_norm": 2.15169620513916, "learning_rate": 1.0932282501554037e-05, "loss": 1.3918, "step": 23854 }, { "epoch": 0.8542983508514334, "grad_norm": 1.519851565361023, "learning_rate": 1.0927009768327068e-05, "loss": 1.2011, "step": 23855 }, { "epoch": 0.8543341629810016, "grad_norm": 1.5760105848312378, "learning_rate": 1.0921738233462297e-05, "loss": 1.5071, "step": 23856 }, { "epoch": 0.85436997511057, "grad_norm": 1.225251317024231, "learning_rate": 1.0916467897030625e-05, "loss": 1.1271, "step": 23857 }, { "epoch": 0.8544057872401383, "grad_norm": 1.4910647869110107, "learning_rate": 1.091119875910297e-05, "loss": 1.2668, "step": 23858 }, { "epoch": 0.8544415993697065, "grad_norm": 1.352760910987854, "learning_rate": 1.0905930819750232e-05, "loss": 1.1062, "step": 23859 }, { "epoch": 0.8544774114992748, "grad_norm": 1.927417278289795, "learning_rate": 1.0900664079043255e-05, "loss": 1.2948, "step": 23860 }, { "epoch": 0.8545132236288431, "grad_norm": 2.803440570831299, "learning_rate": 1.0895398537052914e-05, "loss": 1.2944, "step": 23861 }, { "epoch": 0.8545490357584113, "grad_norm": 1.5908805131912231, "learning_rate": 1.0890134193850043e-05, "loss": 1.4876, "step": 23862 }, { "epoch": 0.8545848478879796, "grad_norm": 1.509763240814209, "learning_rate": 1.0884871049505507e-05, "loss": 1.4407, "step": 23863 }, { "epoch": 0.854620660017548, "grad_norm": 1.424763560295105, "learning_rate": 1.0879609104090049e-05, "loss": 1.6699, "step": 23864 }, { "epoch": 0.8546564721471163, "grad_norm": 1.295540452003479, "learning_rate": 1.0874348357674492e-05, "loss": 1.2205, "step": 23865 }, { "epoch": 0.8546922842766845, "grad_norm": 1.5044755935668945, "learning_rate": 1.0869088810329642e-05, "loss": 1.3277, "step": 23866 }, { "epoch": 0.8547280964062528, "grad_norm": 1.0691648721694946, "learning_rate": 1.0863830462126202e-05, "loss": 1.3522, "step": 23867 }, { "epoch": 0.8547639085358211, "grad_norm": 1.2039045095443726, "learning_rate": 1.085857331313498e-05, "loss": 1.4813, "step": 23868 }, { "epoch": 0.8547997206653893, "grad_norm": 1.421851396560669, "learning_rate": 1.0853317363426618e-05, "loss": 1.3372, "step": 23869 }, { "epoch": 0.8548355327949576, "grad_norm": 1.878260612487793, "learning_rate": 1.0848062613071918e-05, "loss": 1.6804, "step": 23870 }, { "epoch": 0.854871344924526, "grad_norm": 1.3401705026626587, "learning_rate": 1.0842809062141524e-05, "loss": 1.519, "step": 23871 }, { "epoch": 0.8549071570540943, "grad_norm": 1.784934163093567, "learning_rate": 1.083755671070613e-05, "loss": 1.5975, "step": 23872 }, { "epoch": 0.8549429691836625, "grad_norm": 1.439996361732483, "learning_rate": 1.0832305558836397e-05, "loss": 1.2901, "step": 23873 }, { "epoch": 0.8549787813132308, "grad_norm": 2.1497843265533447, "learning_rate": 1.0827055606602998e-05, "loss": 1.6519, "step": 23874 }, { "epoch": 0.8550145934427991, "grad_norm": 1.6868973970413208, "learning_rate": 1.0821806854076533e-05, "loss": 1.6856, "step": 23875 }, { "epoch": 0.8550504055723673, "grad_norm": 1.7532434463500977, "learning_rate": 1.0816559301327589e-05, "loss": 1.3696, "step": 23876 }, { "epoch": 0.8550862177019356, "grad_norm": 1.7454650402069092, "learning_rate": 1.0811312948426844e-05, "loss": 1.4143, "step": 23877 }, { "epoch": 0.855122029831504, "grad_norm": 1.5443825721740723, "learning_rate": 1.0806067795444818e-05, "loss": 1.7368, "step": 23878 }, { "epoch": 0.8551578419610723, "grad_norm": 1.5286147594451904, "learning_rate": 1.0800823842452113e-05, "loss": 1.492, "step": 23879 }, { "epoch": 0.8551936540906405, "grad_norm": 2.415445566177368, "learning_rate": 1.0795581089519236e-05, "loss": 1.6911, "step": 23880 }, { "epoch": 0.8552294662202088, "grad_norm": 2.523890733718872, "learning_rate": 1.0790339536716776e-05, "loss": 1.4355, "step": 23881 }, { "epoch": 0.8552652783497771, "grad_norm": 2.055680751800537, "learning_rate": 1.078509918411521e-05, "loss": 1.2303, "step": 23882 }, { "epoch": 0.8553010904793453, "grad_norm": 1.5757873058319092, "learning_rate": 1.0779860031785061e-05, "loss": 1.5363, "step": 23883 }, { "epoch": 0.8553369026089136, "grad_norm": 1.436819314956665, "learning_rate": 1.0774622079796826e-05, "loss": 1.586, "step": 23884 }, { "epoch": 0.855372714738482, "grad_norm": 2.188523769378662, "learning_rate": 1.0769385328220938e-05, "loss": 1.1043, "step": 23885 }, { "epoch": 0.8554085268680502, "grad_norm": 1.9349035024642944, "learning_rate": 1.0764149777127897e-05, "loss": 1.5145, "step": 23886 }, { "epoch": 0.8554443389976185, "grad_norm": 1.4277589321136475, "learning_rate": 1.0758915426588068e-05, "loss": 1.4414, "step": 23887 }, { "epoch": 0.8554801511271868, "grad_norm": 1.9857627153396606, "learning_rate": 1.0753682276671961e-05, "loss": 1.3229, "step": 23888 }, { "epoch": 0.855515963256755, "grad_norm": 1.2367472648620605, "learning_rate": 1.074845032744991e-05, "loss": 1.3326, "step": 23889 }, { "epoch": 0.8555517753863233, "grad_norm": 1.7371845245361328, "learning_rate": 1.0743219578992369e-05, "loss": 1.3939, "step": 23890 }, { "epoch": 0.8555875875158916, "grad_norm": 1.5299841165542603, "learning_rate": 1.0737990031369627e-05, "loss": 1.4069, "step": 23891 }, { "epoch": 0.85562339964546, "grad_norm": 3.103381395339966, "learning_rate": 1.0732761684652127e-05, "loss": 1.3743, "step": 23892 }, { "epoch": 0.8556592117750282, "grad_norm": 1.619565486907959, "learning_rate": 1.0727534538910177e-05, "loss": 1.7073, "step": 23893 }, { "epoch": 0.8556950239045965, "grad_norm": 1.3993152379989624, "learning_rate": 1.0722308594214081e-05, "loss": 1.3665, "step": 23894 }, { "epoch": 0.8557308360341648, "grad_norm": 1.5730746984481812, "learning_rate": 1.0717083850634158e-05, "loss": 1.4241, "step": 23895 }, { "epoch": 0.855766648163733, "grad_norm": 1.6564350128173828, "learning_rate": 1.0711860308240706e-05, "loss": 1.5138, "step": 23896 }, { "epoch": 0.8558024602933013, "grad_norm": 1.9914555549621582, "learning_rate": 1.0706637967104016e-05, "loss": 1.4409, "step": 23897 }, { "epoch": 0.8558382724228696, "grad_norm": 1.646231770515442, "learning_rate": 1.0701416827294297e-05, "loss": 1.4735, "step": 23898 }, { "epoch": 0.855874084552438, "grad_norm": 1.485743522644043, "learning_rate": 1.069619688888187e-05, "loss": 1.4125, "step": 23899 }, { "epoch": 0.8559098966820062, "grad_norm": 1.75136399269104, "learning_rate": 1.0690978151936892e-05, "loss": 1.4939, "step": 23900 }, { "epoch": 0.8559457088115745, "grad_norm": 1.9949253797531128, "learning_rate": 1.0685760616529628e-05, "loss": 1.5425, "step": 23901 }, { "epoch": 0.8559815209411428, "grad_norm": 1.7424404621124268, "learning_rate": 1.068054428273022e-05, "loss": 1.4624, "step": 23902 }, { "epoch": 0.856017333070711, "grad_norm": 1.6785800457000732, "learning_rate": 1.0675329150608892e-05, "loss": 1.4386, "step": 23903 }, { "epoch": 0.8560531452002793, "grad_norm": 1.4667755365371704, "learning_rate": 1.0670115220235799e-05, "loss": 1.3168, "step": 23904 }, { "epoch": 0.8560889573298476, "grad_norm": 1.4324119091033936, "learning_rate": 1.0664902491681051e-05, "loss": 1.4958, "step": 23905 }, { "epoch": 0.856124769459416, "grad_norm": 3.299861192703247, "learning_rate": 1.0659690965014813e-05, "loss": 1.3856, "step": 23906 }, { "epoch": 0.8561605815889842, "grad_norm": 1.4514801502227783, "learning_rate": 1.0654480640307195e-05, "loss": 1.5554, "step": 23907 }, { "epoch": 0.8561963937185525, "grad_norm": 1.6911697387695312, "learning_rate": 1.0649271517628313e-05, "loss": 1.5904, "step": 23908 }, { "epoch": 0.8562322058481208, "grad_norm": 2.0003654956817627, "learning_rate": 1.0644063597048182e-05, "loss": 1.2916, "step": 23909 }, { "epoch": 0.856268017977689, "grad_norm": 2.0778403282165527, "learning_rate": 1.063885687863696e-05, "loss": 1.2572, "step": 23910 }, { "epoch": 0.8563038301072573, "grad_norm": 1.5954288244247437, "learning_rate": 1.0633651362464647e-05, "loss": 1.4367, "step": 23911 }, { "epoch": 0.8563396422368256, "grad_norm": 1.7671774625778198, "learning_rate": 1.0628447048601265e-05, "loss": 1.7623, "step": 23912 }, { "epoch": 0.856375454366394, "grad_norm": 1.8786360025405884, "learning_rate": 1.0623243937116845e-05, "loss": 1.3735, "step": 23913 }, { "epoch": 0.8564112664959622, "grad_norm": 1.2363111972808838, "learning_rate": 1.06180420280814e-05, "loss": 1.4896, "step": 23914 }, { "epoch": 0.8564470786255305, "grad_norm": 1.6070228815078735, "learning_rate": 1.0612841321564915e-05, "loss": 1.3349, "step": 23915 }, { "epoch": 0.8564828907550988, "grad_norm": 1.6431260108947754, "learning_rate": 1.0607641817637326e-05, "loss": 1.4696, "step": 23916 }, { "epoch": 0.856518702884667, "grad_norm": 2.0966637134552, "learning_rate": 1.060244351636861e-05, "loss": 1.2884, "step": 23917 }, { "epoch": 0.8565545150142353, "grad_norm": 1.5325583219528198, "learning_rate": 1.0597246417828698e-05, "loss": 1.2232, "step": 23918 }, { "epoch": 0.8565903271438036, "grad_norm": 2.3397915363311768, "learning_rate": 1.0592050522087549e-05, "loss": 1.5092, "step": 23919 }, { "epoch": 0.856626139273372, "grad_norm": 1.8545010089874268, "learning_rate": 1.0586855829215003e-05, "loss": 1.2814, "step": 23920 }, { "epoch": 0.8566619514029402, "grad_norm": 1.6768451929092407, "learning_rate": 1.0581662339280973e-05, "loss": 1.5897, "step": 23921 }, { "epoch": 0.8566977635325085, "grad_norm": 1.569795846939087, "learning_rate": 1.0576470052355358e-05, "loss": 1.4008, "step": 23922 }, { "epoch": 0.8567335756620768, "grad_norm": 1.2324440479278564, "learning_rate": 1.057127896850797e-05, "loss": 1.1082, "step": 23923 }, { "epoch": 0.856769387791645, "grad_norm": 2.4131672382354736, "learning_rate": 1.0566089087808672e-05, "loss": 1.3353, "step": 23924 }, { "epoch": 0.8568051999212133, "grad_norm": 1.468252420425415, "learning_rate": 1.056090041032729e-05, "loss": 1.425, "step": 23925 }, { "epoch": 0.8568410120507816, "grad_norm": 2.575080156326294, "learning_rate": 1.0555712936133633e-05, "loss": 1.446, "step": 23926 }, { "epoch": 0.8568768241803499, "grad_norm": 1.8932536840438843, "learning_rate": 1.0550526665297466e-05, "loss": 1.3337, "step": 23927 }, { "epoch": 0.8569126363099182, "grad_norm": 2.1886892318725586, "learning_rate": 1.0545341597888581e-05, "loss": 1.3273, "step": 23928 }, { "epoch": 0.8569484484394865, "grad_norm": 1.8264682292938232, "learning_rate": 1.0540157733976763e-05, "loss": 1.5032, "step": 23929 }, { "epoch": 0.8569842605690547, "grad_norm": 1.7340106964111328, "learning_rate": 1.0534975073631703e-05, "loss": 1.3628, "step": 23930 }, { "epoch": 0.857020072698623, "grad_norm": 1.687674641609192, "learning_rate": 1.0529793616923157e-05, "loss": 1.4686, "step": 23931 }, { "epoch": 0.8570558848281913, "grad_norm": 1.507163643836975, "learning_rate": 1.052461336392082e-05, "loss": 0.9268, "step": 23932 }, { "epoch": 0.8570916969577596, "grad_norm": 1.2720131874084473, "learning_rate": 1.0519434314694422e-05, "loss": 1.4392, "step": 23933 }, { "epoch": 0.8571275090873279, "grad_norm": 1.435158133506775, "learning_rate": 1.0514256469313588e-05, "loss": 1.6138, "step": 23934 }, { "epoch": 0.8571633212168962, "grad_norm": 2.085623025894165, "learning_rate": 1.0509079827848012e-05, "loss": 1.5755, "step": 23935 }, { "epoch": 0.8571991333464645, "grad_norm": 1.562156081199646, "learning_rate": 1.0503904390367325e-05, "loss": 1.3898, "step": 23936 }, { "epoch": 0.8572349454760327, "grad_norm": 2.3077609539031982, "learning_rate": 1.0498730156941184e-05, "loss": 1.6702, "step": 23937 }, { "epoch": 0.857270757605601, "grad_norm": 1.676255702972412, "learning_rate": 1.0493557127639164e-05, "loss": 1.3625, "step": 23938 }, { "epoch": 0.8573065697351693, "grad_norm": 1.866253137588501, "learning_rate": 1.0488385302530878e-05, "loss": 1.3196, "step": 23939 }, { "epoch": 0.8573423818647375, "grad_norm": 1.4433153867721558, "learning_rate": 1.0483214681685927e-05, "loss": 1.6465, "step": 23940 }, { "epoch": 0.8573781939943059, "grad_norm": 2.484351873397827, "learning_rate": 1.047804526517383e-05, "loss": 1.3115, "step": 23941 }, { "epoch": 0.8574140061238742, "grad_norm": 1.8021259307861328, "learning_rate": 1.0472877053064156e-05, "loss": 1.4364, "step": 23942 }, { "epoch": 0.8574498182534425, "grad_norm": 1.6426368951797485, "learning_rate": 1.0467710045426449e-05, "loss": 1.6388, "step": 23943 }, { "epoch": 0.8574856303830107, "grad_norm": 1.6121195554733276, "learning_rate": 1.046254424233023e-05, "loss": 1.1081, "step": 23944 }, { "epoch": 0.857521442512579, "grad_norm": 1.808910846710205, "learning_rate": 1.0457379643844966e-05, "loss": 1.5212, "step": 23945 }, { "epoch": 0.8575572546421473, "grad_norm": 1.7684128284454346, "learning_rate": 1.0452216250040148e-05, "loss": 1.5465, "step": 23946 }, { "epoch": 0.8575930667717155, "grad_norm": 1.6211570501327515, "learning_rate": 1.0447054060985284e-05, "loss": 1.3641, "step": 23947 }, { "epoch": 0.8576288789012839, "grad_norm": 1.535387396812439, "learning_rate": 1.0441893076749765e-05, "loss": 1.6225, "step": 23948 }, { "epoch": 0.8576646910308522, "grad_norm": 1.648389220237732, "learning_rate": 1.0436733297403056e-05, "loss": 1.1475, "step": 23949 }, { "epoch": 0.8577005031604205, "grad_norm": 1.5785759687423706, "learning_rate": 1.043157472301457e-05, "loss": 1.3503, "step": 23950 }, { "epoch": 0.8577363152899887, "grad_norm": 1.7339171171188354, "learning_rate": 1.0426417353653739e-05, "loss": 1.1188, "step": 23951 }, { "epoch": 0.857772127419557, "grad_norm": 1.6951241493225098, "learning_rate": 1.0421261189389885e-05, "loss": 1.3101, "step": 23952 }, { "epoch": 0.8578079395491253, "grad_norm": 2.160072088241577, "learning_rate": 1.0416106230292432e-05, "loss": 1.5236, "step": 23953 }, { "epoch": 0.8578437516786935, "grad_norm": 1.4381632804870605, "learning_rate": 1.0410952476430703e-05, "loss": 1.3926, "step": 23954 }, { "epoch": 0.8578795638082619, "grad_norm": 2.0479938983917236, "learning_rate": 1.0405799927874072e-05, "loss": 1.4182, "step": 23955 }, { "epoch": 0.8579153759378302, "grad_norm": 1.3435159921646118, "learning_rate": 1.0400648584691808e-05, "loss": 1.4873, "step": 23956 }, { "epoch": 0.8579511880673985, "grad_norm": 1.5888739824295044, "learning_rate": 1.0395498446953245e-05, "loss": 1.4416, "step": 23957 }, { "epoch": 0.8579870001969667, "grad_norm": 1.7520204782485962, "learning_rate": 1.0390349514727694e-05, "loss": 1.2782, "step": 23958 }, { "epoch": 0.858022812326535, "grad_norm": 1.682873249053955, "learning_rate": 1.0385201788084375e-05, "loss": 1.6233, "step": 23959 }, { "epoch": 0.8580586244561033, "grad_norm": 1.4209063053131104, "learning_rate": 1.0380055267092581e-05, "loss": 1.7817, "step": 23960 }, { "epoch": 0.8580944365856715, "grad_norm": 1.583618402481079, "learning_rate": 1.0374909951821532e-05, "loss": 1.3119, "step": 23961 }, { "epoch": 0.8581302487152399, "grad_norm": 1.6202590465545654, "learning_rate": 1.0369765842340484e-05, "loss": 1.3546, "step": 23962 }, { "epoch": 0.8581660608448082, "grad_norm": 1.4713712930679321, "learning_rate": 1.0364622938718627e-05, "loss": 1.2915, "step": 23963 }, { "epoch": 0.8582018729743764, "grad_norm": 1.4419574737548828, "learning_rate": 1.0359481241025105e-05, "loss": 1.3699, "step": 23964 }, { "epoch": 0.8582376851039447, "grad_norm": 2.112922430038452, "learning_rate": 1.0354340749329172e-05, "loss": 1.1557, "step": 23965 }, { "epoch": 0.858273497233513, "grad_norm": 1.5085670948028564, "learning_rate": 1.0349201463699932e-05, "loss": 1.4269, "step": 23966 }, { "epoch": 0.8583093093630813, "grad_norm": 1.2277408838272095, "learning_rate": 1.0344063384206537e-05, "loss": 1.3207, "step": 23967 }, { "epoch": 0.8583451214926495, "grad_norm": 1.8692991733551025, "learning_rate": 1.0338926510918134e-05, "loss": 1.8965, "step": 23968 }, { "epoch": 0.8583809336222178, "grad_norm": 1.5993348360061646, "learning_rate": 1.0333790843903835e-05, "loss": 1.3095, "step": 23969 }, { "epoch": 0.8584167457517862, "grad_norm": 1.7335530519485474, "learning_rate": 1.0328656383232692e-05, "loss": 1.3135, "step": 23970 }, { "epoch": 0.8584525578813544, "grad_norm": 1.4201031923294067, "learning_rate": 1.0323523128973822e-05, "loss": 1.2858, "step": 23971 }, { "epoch": 0.8584883700109227, "grad_norm": 1.8706300258636475, "learning_rate": 1.0318391081196288e-05, "loss": 1.4368, "step": 23972 }, { "epoch": 0.858524182140491, "grad_norm": 1.5056337118148804, "learning_rate": 1.0313260239969102e-05, "loss": 1.4255, "step": 23973 }, { "epoch": 0.8585599942700592, "grad_norm": 1.665971040725708, "learning_rate": 1.0308130605361333e-05, "loss": 1.4536, "step": 23974 }, { "epoch": 0.8585958063996275, "grad_norm": 1.5636063814163208, "learning_rate": 1.0303002177441934e-05, "loss": 1.3331, "step": 23975 }, { "epoch": 0.8586316185291958, "grad_norm": 1.4892923831939697, "learning_rate": 1.0297874956279974e-05, "loss": 1.2562, "step": 23976 }, { "epoch": 0.8586674306587642, "grad_norm": 1.3059931993484497, "learning_rate": 1.0292748941944385e-05, "loss": 1.2593, "step": 23977 }, { "epoch": 0.8587032427883324, "grad_norm": 1.7474253177642822, "learning_rate": 1.0287624134504158e-05, "loss": 1.2735, "step": 23978 }, { "epoch": 0.8587390549179007, "grad_norm": 1.916402816772461, "learning_rate": 1.0282500534028195e-05, "loss": 1.5238, "step": 23979 }, { "epoch": 0.858774867047469, "grad_norm": 1.8782771825790405, "learning_rate": 1.0277378140585491e-05, "loss": 1.4562, "step": 23980 }, { "epoch": 0.8588106791770372, "grad_norm": 1.3739666938781738, "learning_rate": 1.0272256954244941e-05, "loss": 1.6322, "step": 23981 }, { "epoch": 0.8588464913066055, "grad_norm": 1.7305517196655273, "learning_rate": 1.0267136975075386e-05, "loss": 1.2822, "step": 23982 }, { "epoch": 0.8588823034361738, "grad_norm": 1.8392616510391235, "learning_rate": 1.0262018203145796e-05, "loss": 1.331, "step": 23983 }, { "epoch": 0.8589181155657422, "grad_norm": 1.3434126377105713, "learning_rate": 1.0256900638524979e-05, "loss": 1.5343, "step": 23984 }, { "epoch": 0.8589539276953104, "grad_norm": 1.373840093612671, "learning_rate": 1.0251784281281829e-05, "loss": 1.4114, "step": 23985 }, { "epoch": 0.8589897398248787, "grad_norm": 2.5821125507354736, "learning_rate": 1.0246669131485109e-05, "loss": 1.411, "step": 23986 }, { "epoch": 0.859025551954447, "grad_norm": 1.8943785429000854, "learning_rate": 1.0241555189203722e-05, "loss": 1.3037, "step": 23987 }, { "epoch": 0.8590613640840152, "grad_norm": 1.3766758441925049, "learning_rate": 1.0236442454506411e-05, "loss": 1.3691, "step": 23988 }, { "epoch": 0.8590971762135835, "grad_norm": 2.0536932945251465, "learning_rate": 1.0231330927462002e-05, "loss": 1.1827, "step": 23989 }, { "epoch": 0.8591329883431518, "grad_norm": 1.7797173261642456, "learning_rate": 1.0226220608139214e-05, "loss": 1.53, "step": 23990 }, { "epoch": 0.8591688004727202, "grad_norm": 1.6189501285552979, "learning_rate": 1.022111149660684e-05, "loss": 1.8422, "step": 23991 }, { "epoch": 0.8592046126022884, "grad_norm": 1.5615276098251343, "learning_rate": 1.021600359293361e-05, "loss": 1.3183, "step": 23992 }, { "epoch": 0.8592404247318567, "grad_norm": 1.7850444316864014, "learning_rate": 1.0210896897188216e-05, "loss": 1.5006, "step": 23993 }, { "epoch": 0.859276236861425, "grad_norm": 2.1221442222595215, "learning_rate": 1.0205791409439413e-05, "loss": 1.2876, "step": 23994 }, { "epoch": 0.8593120489909932, "grad_norm": 1.399099349975586, "learning_rate": 1.0200687129755837e-05, "loss": 1.0976, "step": 23995 }, { "epoch": 0.8593478611205615, "grad_norm": 1.5655336380004883, "learning_rate": 1.0195584058206209e-05, "loss": 1.1475, "step": 23996 }, { "epoch": 0.8593836732501298, "grad_norm": 1.4112638235092163, "learning_rate": 1.0190482194859119e-05, "loss": 1.2891, "step": 23997 }, { "epoch": 0.8594194853796981, "grad_norm": 1.8032960891723633, "learning_rate": 1.018538153978329e-05, "loss": 1.3916, "step": 23998 }, { "epoch": 0.8594552975092664, "grad_norm": 1.1614011526107788, "learning_rate": 1.0180282093047288e-05, "loss": 0.9882, "step": 23999 }, { "epoch": 0.8594911096388347, "grad_norm": 1.553151249885559, "learning_rate": 1.0175183854719716e-05, "loss": 1.6078, "step": 24000 }, { "epoch": 0.859526921768403, "grad_norm": 1.4639896154403687, "learning_rate": 1.0170086824869184e-05, "loss": 1.3421, "step": 24001 }, { "epoch": 0.8595627338979712, "grad_norm": 1.3828316926956177, "learning_rate": 1.0164991003564261e-05, "loss": 1.2658, "step": 24002 }, { "epoch": 0.8595985460275395, "grad_norm": 2.3339357376098633, "learning_rate": 1.0159896390873524e-05, "loss": 1.5719, "step": 24003 }, { "epoch": 0.8596343581571078, "grad_norm": 1.4203572273254395, "learning_rate": 1.0154802986865475e-05, "loss": 1.5003, "step": 24004 }, { "epoch": 0.8596701702866761, "grad_norm": 1.435115933418274, "learning_rate": 1.0149710791608657e-05, "loss": 1.3779, "step": 24005 }, { "epoch": 0.8597059824162444, "grad_norm": 1.3784613609313965, "learning_rate": 1.0144619805171584e-05, "loss": 1.4091, "step": 24006 }, { "epoch": 0.8597417945458127, "grad_norm": 1.42812979221344, "learning_rate": 1.0139530027622768e-05, "loss": 1.2791, "step": 24007 }, { "epoch": 0.859777606675381, "grad_norm": 1.809586763381958, "learning_rate": 1.0134441459030642e-05, "loss": 1.3902, "step": 24008 }, { "epoch": 0.8598134188049492, "grad_norm": 1.387447714805603, "learning_rate": 1.0129354099463683e-05, "loss": 1.4874, "step": 24009 }, { "epoch": 0.8598492309345175, "grad_norm": 1.933063268661499, "learning_rate": 1.0124267948990363e-05, "loss": 1.7334, "step": 24010 }, { "epoch": 0.8598850430640858, "grad_norm": 1.4610179662704468, "learning_rate": 1.0119183007679067e-05, "loss": 1.3521, "step": 24011 }, { "epoch": 0.8599208551936541, "grad_norm": 1.5905406475067139, "learning_rate": 1.0114099275598232e-05, "loss": 1.497, "step": 24012 }, { "epoch": 0.8599566673232224, "grad_norm": 1.7359148263931274, "learning_rate": 1.0109016752816247e-05, "loss": 1.3048, "step": 24013 }, { "epoch": 0.8599924794527907, "grad_norm": 2.0931310653686523, "learning_rate": 1.0103935439401502e-05, "loss": 1.3526, "step": 24014 }, { "epoch": 0.8600282915823589, "grad_norm": 1.846803903579712, "learning_rate": 1.0098855335422331e-05, "loss": 1.521, "step": 24015 }, { "epoch": 0.8600641037119272, "grad_norm": 1.602505087852478, "learning_rate": 1.00937764409471e-05, "loss": 1.6741, "step": 24016 }, { "epoch": 0.8600999158414955, "grad_norm": 1.551328420639038, "learning_rate": 1.0088698756044146e-05, "loss": 1.2546, "step": 24017 }, { "epoch": 0.8601357279710637, "grad_norm": 1.5050621032714844, "learning_rate": 1.0083622280781769e-05, "loss": 1.4149, "step": 24018 }, { "epoch": 0.8601715401006321, "grad_norm": 1.488797903060913, "learning_rate": 1.0078547015228257e-05, "loss": 1.5439, "step": 24019 }, { "epoch": 0.8602073522302004, "grad_norm": 1.7439039945602417, "learning_rate": 1.0073472959451913e-05, "loss": 1.1836, "step": 24020 }, { "epoch": 0.8602431643597687, "grad_norm": 1.8317373991012573, "learning_rate": 1.0068400113521014e-05, "loss": 1.4543, "step": 24021 }, { "epoch": 0.8602789764893369, "grad_norm": 1.8038636445999146, "learning_rate": 1.0063328477503764e-05, "loss": 1.5558, "step": 24022 }, { "epoch": 0.8603147886189052, "grad_norm": 2.062847137451172, "learning_rate": 1.0058258051468417e-05, "loss": 1.2856, "step": 24023 }, { "epoch": 0.8603506007484735, "grad_norm": 2.013709306716919, "learning_rate": 1.0053188835483197e-05, "loss": 1.5779, "step": 24024 }, { "epoch": 0.8603864128780417, "grad_norm": 2.6702752113342285, "learning_rate": 1.0048120829616314e-05, "loss": 1.3096, "step": 24025 }, { "epoch": 0.8604222250076101, "grad_norm": 1.617053508758545, "learning_rate": 1.0043054033935917e-05, "loss": 1.5229, "step": 24026 }, { "epoch": 0.8604580371371784, "grad_norm": 1.3419846296310425, "learning_rate": 1.0037988448510193e-05, "loss": 1.154, "step": 24027 }, { "epoch": 0.8604938492667467, "grad_norm": 1.271104097366333, "learning_rate": 1.0032924073407313e-05, "loss": 1.4895, "step": 24028 }, { "epoch": 0.8605296613963149, "grad_norm": 1.682997703552246, "learning_rate": 1.0027860908695363e-05, "loss": 1.4205, "step": 24029 }, { "epoch": 0.8605654735258832, "grad_norm": 1.687303066253662, "learning_rate": 1.0022798954442491e-05, "loss": 1.2232, "step": 24030 }, { "epoch": 0.8606012856554515, "grad_norm": 1.7541818618774414, "learning_rate": 1.001773821071681e-05, "loss": 1.2612, "step": 24031 }, { "epoch": 0.8606370977850197, "grad_norm": 2.01399302482605, "learning_rate": 1.0012678677586396e-05, "loss": 1.4981, "step": 24032 }, { "epoch": 0.8606729099145881, "grad_norm": 1.589170217514038, "learning_rate": 1.0007620355119307e-05, "loss": 1.3366, "step": 24033 }, { "epoch": 0.8607087220441564, "grad_norm": 1.6481248140335083, "learning_rate": 1.000256324338359e-05, "loss": 1.3699, "step": 24034 }, { "epoch": 0.8607445341737247, "grad_norm": 1.8044880628585815, "learning_rate": 9.997507342447333e-06, "loss": 1.5593, "step": 24035 }, { "epoch": 0.8607803463032929, "grad_norm": 1.9255050420761108, "learning_rate": 9.992452652378493e-06, "loss": 1.5335, "step": 24036 }, { "epoch": 0.8608161584328612, "grad_norm": 1.7744263410568237, "learning_rate": 9.987399173245093e-06, "loss": 1.1219, "step": 24037 }, { "epoch": 0.8608519705624295, "grad_norm": 1.6102805137634277, "learning_rate": 9.982346905115137e-06, "loss": 1.3634, "step": 24038 }, { "epoch": 0.8608877826919977, "grad_norm": 1.4857194423675537, "learning_rate": 9.977295848056612e-06, "loss": 1.3996, "step": 24039 }, { "epoch": 0.8609235948215661, "grad_norm": 1.8359849452972412, "learning_rate": 9.97224600213742e-06, "loss": 1.3318, "step": 24040 }, { "epoch": 0.8609594069511344, "grad_norm": 1.50896155834198, "learning_rate": 9.96719736742554e-06, "loss": 1.6464, "step": 24041 }, { "epoch": 0.8609952190807026, "grad_norm": 1.3213436603546143, "learning_rate": 9.962149943988885e-06, "loss": 1.2947, "step": 24042 }, { "epoch": 0.8610310312102709, "grad_norm": 1.540392279624939, "learning_rate": 9.957103731895379e-06, "loss": 1.5812, "step": 24043 }, { "epoch": 0.8610668433398392, "grad_norm": 1.953053593635559, "learning_rate": 9.952058731212877e-06, "loss": 1.8253, "step": 24044 }, { "epoch": 0.8611026554694075, "grad_norm": 1.9430025815963745, "learning_rate": 9.947014942009269e-06, "loss": 1.4107, "step": 24045 }, { "epoch": 0.8611384675989757, "grad_norm": 1.5683186054229736, "learning_rate": 9.941972364352436e-06, "loss": 1.6623, "step": 24046 }, { "epoch": 0.8611742797285441, "grad_norm": 1.603224754333496, "learning_rate": 9.936930998310179e-06, "loss": 1.4686, "step": 24047 }, { "epoch": 0.8612100918581124, "grad_norm": 1.5715124607086182, "learning_rate": 9.931890843950342e-06, "loss": 1.481, "step": 24048 }, { "epoch": 0.8612459039876806, "grad_norm": 1.7702678442001343, "learning_rate": 9.92685190134074e-06, "loss": 1.6707, "step": 24049 }, { "epoch": 0.8612817161172489, "grad_norm": 1.6073265075683594, "learning_rate": 9.921814170549171e-06, "loss": 1.2719, "step": 24050 }, { "epoch": 0.8613175282468172, "grad_norm": 2.0798747539520264, "learning_rate": 9.916777651643383e-06, "loss": 1.3234, "step": 24051 }, { "epoch": 0.8613533403763854, "grad_norm": 2.2637381553649902, "learning_rate": 9.911742344691156e-06, "loss": 1.5011, "step": 24052 }, { "epoch": 0.8613891525059537, "grad_norm": 1.9088646173477173, "learning_rate": 9.906708249760244e-06, "loss": 1.2568, "step": 24053 }, { "epoch": 0.8614249646355221, "grad_norm": 1.5362104177474976, "learning_rate": 9.901675366918339e-06, "loss": 1.2768, "step": 24054 }, { "epoch": 0.8614607767650904, "grad_norm": 1.4941067695617676, "learning_rate": 9.896643696233177e-06, "loss": 1.0898, "step": 24055 }, { "epoch": 0.8614965888946586, "grad_norm": 1.436546802520752, "learning_rate": 9.891613237772458e-06, "loss": 1.5783, "step": 24056 }, { "epoch": 0.8615324010242269, "grad_norm": 1.2099878787994385, "learning_rate": 9.88658399160386e-06, "loss": 1.3196, "step": 24057 }, { "epoch": 0.8615682131537952, "grad_norm": 1.3755364418029785, "learning_rate": 9.88155595779502e-06, "loss": 1.4722, "step": 24058 }, { "epoch": 0.8616040252833634, "grad_norm": 1.5416947603225708, "learning_rate": 9.876529136413593e-06, "loss": 1.2288, "step": 24059 }, { "epoch": 0.8616398374129317, "grad_norm": 1.4445111751556396, "learning_rate": 9.871503527527226e-06, "loss": 1.4655, "step": 24060 }, { "epoch": 0.8616756495425001, "grad_norm": 1.4712949991226196, "learning_rate": 9.866479131203544e-06, "loss": 1.3464, "step": 24061 }, { "epoch": 0.8617114616720684, "grad_norm": 1.454877495765686, "learning_rate": 9.861455947510112e-06, "loss": 1.4811, "step": 24062 }, { "epoch": 0.8617472738016366, "grad_norm": 1.4645787477493286, "learning_rate": 9.856433976514479e-06, "loss": 1.4101, "step": 24063 }, { "epoch": 0.8617830859312049, "grad_norm": 2.1881892681121826, "learning_rate": 9.8514132182843e-06, "loss": 1.2949, "step": 24064 }, { "epoch": 0.8618188980607732, "grad_norm": 1.827864170074463, "learning_rate": 9.846393672887044e-06, "loss": 1.496, "step": 24065 }, { "epoch": 0.8618547101903414, "grad_norm": 1.3500398397445679, "learning_rate": 9.841375340390268e-06, "loss": 1.4182, "step": 24066 }, { "epoch": 0.8618905223199097, "grad_norm": 1.408724069595337, "learning_rate": 9.836358220861508e-06, "loss": 1.3504, "step": 24067 }, { "epoch": 0.8619263344494781, "grad_norm": 1.4061379432678223, "learning_rate": 9.831342314368252e-06, "loss": 1.1648, "step": 24068 }, { "epoch": 0.8619621465790464, "grad_norm": 1.690593957901001, "learning_rate": 9.826327620977972e-06, "loss": 1.3875, "step": 24069 }, { "epoch": 0.8619979587086146, "grad_norm": 1.5156593322753906, "learning_rate": 9.82131414075811e-06, "loss": 1.1197, "step": 24070 }, { "epoch": 0.8620337708381829, "grad_norm": 1.518989086151123, "learning_rate": 9.816301873776178e-06, "loss": 1.2619, "step": 24071 }, { "epoch": 0.8620695829677512, "grad_norm": 1.8217155933380127, "learning_rate": 9.81129082009955e-06, "loss": 1.471, "step": 24072 }, { "epoch": 0.8621053950973194, "grad_norm": 1.4540605545043945, "learning_rate": 9.8062809797957e-06, "loss": 1.2745, "step": 24073 }, { "epoch": 0.8621412072268877, "grad_norm": 1.4706782102584839, "learning_rate": 9.801272352931957e-06, "loss": 1.4899, "step": 24074 }, { "epoch": 0.8621770193564561, "grad_norm": 1.7271860837936401, "learning_rate": 9.796264939575784e-06, "loss": 1.5868, "step": 24075 }, { "epoch": 0.8622128314860243, "grad_norm": 1.2994468212127686, "learning_rate": 9.791258739794484e-06, "loss": 1.5339, "step": 24076 }, { "epoch": 0.8622486436155926, "grad_norm": 1.6923842430114746, "learning_rate": 9.78625375365545e-06, "loss": 1.3637, "step": 24077 }, { "epoch": 0.8622844557451609, "grad_norm": 1.8347572088241577, "learning_rate": 9.781249981226015e-06, "loss": 1.6562, "step": 24078 }, { "epoch": 0.8623202678747292, "grad_norm": 2.0344903469085693, "learning_rate": 9.77624742257347e-06, "loss": 1.68, "step": 24079 }, { "epoch": 0.8623560800042974, "grad_norm": 1.6433473825454712, "learning_rate": 9.771246077765151e-06, "loss": 1.554, "step": 24080 }, { "epoch": 0.8623918921338657, "grad_norm": 1.4841326475143433, "learning_rate": 9.766245946868302e-06, "loss": 1.5479, "step": 24081 }, { "epoch": 0.8624277042634341, "grad_norm": 1.3508626222610474, "learning_rate": 9.761247029950249e-06, "loss": 1.2383, "step": 24082 }, { "epoch": 0.8624635163930023, "grad_norm": 1.4519118070602417, "learning_rate": 9.756249327078204e-06, "loss": 1.3748, "step": 24083 }, { "epoch": 0.8624993285225706, "grad_norm": 2.055274486541748, "learning_rate": 9.751252838319436e-06, "loss": 1.6204, "step": 24084 }, { "epoch": 0.8625351406521389, "grad_norm": 1.8891230821609497, "learning_rate": 9.746257563741102e-06, "loss": 1.2223, "step": 24085 }, { "epoch": 0.8625709527817071, "grad_norm": 1.5189100503921509, "learning_rate": 9.741263503410503e-06, "loss": 1.4791, "step": 24086 }, { "epoch": 0.8626067649112754, "grad_norm": 1.586703896522522, "learning_rate": 9.736270657394774e-06, "loss": 1.1318, "step": 24087 }, { "epoch": 0.8626425770408437, "grad_norm": 1.821293830871582, "learning_rate": 9.731279025761076e-06, "loss": 1.2319, "step": 24088 }, { "epoch": 0.8626783891704121, "grad_norm": 1.7827171087265015, "learning_rate": 9.726288608576573e-06, "loss": 1.3376, "step": 24089 }, { "epoch": 0.8627142012999803, "grad_norm": 1.368043303489685, "learning_rate": 9.721299405908412e-06, "loss": 1.5547, "step": 24090 }, { "epoch": 0.8627500134295486, "grad_norm": 2.012338638305664, "learning_rate": 9.716311417823742e-06, "loss": 1.7127, "step": 24091 }, { "epoch": 0.8627858255591169, "grad_norm": 2.133498191833496, "learning_rate": 9.711324644389609e-06, "loss": 1.4458, "step": 24092 }, { "epoch": 0.8628216376886851, "grad_norm": 1.5990008115768433, "learning_rate": 9.706339085673167e-06, "loss": 1.2618, "step": 24093 }, { "epoch": 0.8628574498182534, "grad_norm": 1.9938685894012451, "learning_rate": 9.701354741741454e-06, "loss": 1.6879, "step": 24094 }, { "epoch": 0.8628932619478217, "grad_norm": 1.8313326835632324, "learning_rate": 9.696371612661548e-06, "loss": 1.3386, "step": 24095 }, { "epoch": 0.8629290740773901, "grad_norm": 1.820915699005127, "learning_rate": 9.691389698500463e-06, "loss": 1.2029, "step": 24096 }, { "epoch": 0.8629648862069583, "grad_norm": 1.3561536073684692, "learning_rate": 9.686408999325236e-06, "loss": 1.3453, "step": 24097 }, { "epoch": 0.8630006983365266, "grad_norm": 2.9096224308013916, "learning_rate": 9.6814295152029e-06, "loss": 1.2922, "step": 24098 }, { "epoch": 0.8630365104660949, "grad_norm": 1.629462718963623, "learning_rate": 9.676451246200401e-06, "loss": 1.4125, "step": 24099 }, { "epoch": 0.8630723225956631, "grad_norm": 1.6376150846481323, "learning_rate": 9.671474192384755e-06, "loss": 1.2534, "step": 24100 }, { "epoch": 0.8631081347252314, "grad_norm": 1.6502249240875244, "learning_rate": 9.666498353822905e-06, "loss": 1.478, "step": 24101 }, { "epoch": 0.8631439468547997, "grad_norm": 2.37195086479187, "learning_rate": 9.661523730581813e-06, "loss": 1.2485, "step": 24102 }, { "epoch": 0.863179758984368, "grad_norm": 1.9842101335525513, "learning_rate": 9.656550322728353e-06, "loss": 1.3154, "step": 24103 }, { "epoch": 0.8632155711139363, "grad_norm": 1.7502880096435547, "learning_rate": 9.651578130329508e-06, "loss": 1.5923, "step": 24104 }, { "epoch": 0.8632513832435046, "grad_norm": 1.3002649545669556, "learning_rate": 9.646607153452147e-06, "loss": 1.1142, "step": 24105 }, { "epoch": 0.8632871953730729, "grad_norm": 1.5864512920379639, "learning_rate": 9.641637392163116e-06, "loss": 1.1018, "step": 24106 }, { "epoch": 0.8633230075026411, "grad_norm": 1.585464596748352, "learning_rate": 9.636668846529296e-06, "loss": 1.3463, "step": 24107 }, { "epoch": 0.8633588196322094, "grad_norm": 1.4023091793060303, "learning_rate": 9.631701516617542e-06, "loss": 1.2446, "step": 24108 }, { "epoch": 0.8633946317617777, "grad_norm": 1.7487534284591675, "learning_rate": 9.626735402494703e-06, "loss": 1.1232, "step": 24109 }, { "epoch": 0.863430443891346, "grad_norm": 2.0945987701416016, "learning_rate": 9.621770504227534e-06, "loss": 1.4021, "step": 24110 }, { "epoch": 0.8634662560209143, "grad_norm": 1.8034226894378662, "learning_rate": 9.616806821882873e-06, "loss": 1.2245, "step": 24111 }, { "epoch": 0.8635020681504826, "grad_norm": 1.5060043334960938, "learning_rate": 9.611844355527477e-06, "loss": 1.4211, "step": 24112 }, { "epoch": 0.8635378802800509, "grad_norm": 1.9010181427001953, "learning_rate": 9.60688310522816e-06, "loss": 1.5169, "step": 24113 }, { "epoch": 0.8635736924096191, "grad_norm": 2.2144103050231934, "learning_rate": 9.6019230710516e-06, "loss": 1.7569, "step": 24114 }, { "epoch": 0.8636095045391874, "grad_norm": 1.660400390625, "learning_rate": 9.596964253064567e-06, "loss": 1.3721, "step": 24115 }, { "epoch": 0.8636453166687557, "grad_norm": 1.677625298500061, "learning_rate": 9.592006651333785e-06, "loss": 1.4996, "step": 24116 }, { "epoch": 0.863681128798324, "grad_norm": 1.848111867904663, "learning_rate": 9.587050265925912e-06, "loss": 1.7495, "step": 24117 }, { "epoch": 0.8637169409278923, "grad_norm": 1.6364637613296509, "learning_rate": 9.582095096907651e-06, "loss": 1.3123, "step": 24118 }, { "epoch": 0.8637527530574606, "grad_norm": 1.3802284002304077, "learning_rate": 9.57714114434568e-06, "loss": 1.4342, "step": 24119 }, { "epoch": 0.8637885651870288, "grad_norm": 1.6368407011032104, "learning_rate": 9.572188408306649e-06, "loss": 1.496, "step": 24120 }, { "epoch": 0.8638243773165971, "grad_norm": 1.535515546798706, "learning_rate": 9.567236888857166e-06, "loss": 1.4658, "step": 24121 }, { "epoch": 0.8638601894461654, "grad_norm": 1.625495195388794, "learning_rate": 9.562286586063861e-06, "loss": 1.8956, "step": 24122 }, { "epoch": 0.8638960015757337, "grad_norm": 1.8822096586227417, "learning_rate": 9.557337499993346e-06, "loss": 1.4238, "step": 24123 }, { "epoch": 0.863931813705302, "grad_norm": 2.1007256507873535, "learning_rate": 9.552389630712178e-06, "loss": 1.5239, "step": 24124 }, { "epoch": 0.8639676258348703, "grad_norm": 2.123950958251953, "learning_rate": 9.547442978286946e-06, "loss": 1.5102, "step": 24125 }, { "epoch": 0.8640034379644386, "grad_norm": 1.6733735799789429, "learning_rate": 9.542497542784178e-06, "loss": 1.2072, "step": 24126 }, { "epoch": 0.8640392500940068, "grad_norm": 1.3590643405914307, "learning_rate": 9.537553324270455e-06, "loss": 1.7378, "step": 24127 }, { "epoch": 0.8640750622235751, "grad_norm": 1.9480410814285278, "learning_rate": 9.53261032281224e-06, "loss": 1.5894, "step": 24128 }, { "epoch": 0.8641108743531434, "grad_norm": 1.5625706911087036, "learning_rate": 9.527668538476054e-06, "loss": 1.3688, "step": 24129 }, { "epoch": 0.8641466864827116, "grad_norm": 1.3763489723205566, "learning_rate": 9.522727971328393e-06, "loss": 0.9818, "step": 24130 }, { "epoch": 0.86418249861228, "grad_norm": 2.1239705085754395, "learning_rate": 9.517788621435742e-06, "loss": 1.7518, "step": 24131 }, { "epoch": 0.8642183107418483, "grad_norm": 2.0525310039520264, "learning_rate": 9.512850488864511e-06, "loss": 1.6703, "step": 24132 }, { "epoch": 0.8642541228714166, "grad_norm": 2.882822036743164, "learning_rate": 9.50791357368115e-06, "loss": 1.3199, "step": 24133 }, { "epoch": 0.8642899350009848, "grad_norm": 1.5467312335968018, "learning_rate": 9.502977875952113e-06, "loss": 1.4511, "step": 24134 }, { "epoch": 0.8643257471305531, "grad_norm": 1.8754770755767822, "learning_rate": 9.49804339574375e-06, "loss": 1.5401, "step": 24135 }, { "epoch": 0.8643615592601214, "grad_norm": 1.4859979152679443, "learning_rate": 9.493110133122474e-06, "loss": 1.5788, "step": 24136 }, { "epoch": 0.8643973713896896, "grad_norm": 1.6180216073989868, "learning_rate": 9.488178088154654e-06, "loss": 1.0272, "step": 24137 }, { "epoch": 0.864433183519258, "grad_norm": 1.5847938060760498, "learning_rate": 9.48324726090667e-06, "loss": 1.4547, "step": 24138 }, { "epoch": 0.8644689956488263, "grad_norm": 1.4334745407104492, "learning_rate": 9.478317651444812e-06, "loss": 1.2533, "step": 24139 }, { "epoch": 0.8645048077783946, "grad_norm": 1.6126441955566406, "learning_rate": 9.47338925983543e-06, "loss": 1.2693, "step": 24140 }, { "epoch": 0.8645406199079628, "grad_norm": 1.5606629848480225, "learning_rate": 9.468462086144847e-06, "loss": 1.3871, "step": 24141 }, { "epoch": 0.8645764320375311, "grad_norm": 1.4781626462936401, "learning_rate": 9.46353613043931e-06, "loss": 1.239, "step": 24142 }, { "epoch": 0.8646122441670994, "grad_norm": 1.2007153034210205, "learning_rate": 9.4586113927851e-06, "loss": 1.2876, "step": 24143 }, { "epoch": 0.8646480562966676, "grad_norm": 1.7525100708007812, "learning_rate": 9.453687873248495e-06, "loss": 1.5107, "step": 24144 }, { "epoch": 0.864683868426236, "grad_norm": 1.4336668252944946, "learning_rate": 9.448765571895735e-06, "loss": 1.4305, "step": 24145 }, { "epoch": 0.8647196805558043, "grad_norm": 2.0794224739074707, "learning_rate": 9.443844488793018e-06, "loss": 1.7878, "step": 24146 }, { "epoch": 0.8647554926853726, "grad_norm": 1.4945629835128784, "learning_rate": 9.438924624006563e-06, "loss": 1.5938, "step": 24147 }, { "epoch": 0.8647913048149408, "grad_norm": 1.2787604331970215, "learning_rate": 9.434005977602556e-06, "loss": 1.2151, "step": 24148 }, { "epoch": 0.8648271169445091, "grad_norm": 1.4332270622253418, "learning_rate": 9.429088549647203e-06, "loss": 1.2921, "step": 24149 }, { "epoch": 0.8648629290740774, "grad_norm": 1.8126541376113892, "learning_rate": 9.424172340206616e-06, "loss": 1.3874, "step": 24150 }, { "epoch": 0.8648987412036456, "grad_norm": 1.3045158386230469, "learning_rate": 9.419257349346956e-06, "loss": 1.3823, "step": 24151 }, { "epoch": 0.864934553333214, "grad_norm": 1.5711522102355957, "learning_rate": 9.414343577134355e-06, "loss": 1.5328, "step": 24152 }, { "epoch": 0.8649703654627823, "grad_norm": 1.600151777267456, "learning_rate": 9.409431023634908e-06, "loss": 1.39, "step": 24153 }, { "epoch": 0.8650061775923505, "grad_norm": 1.6828088760375977, "learning_rate": 9.404519688914703e-06, "loss": 1.2594, "step": 24154 }, { "epoch": 0.8650419897219188, "grad_norm": 1.626774787902832, "learning_rate": 9.399609573039836e-06, "loss": 1.4374, "step": 24155 }, { "epoch": 0.8650778018514871, "grad_norm": 1.709047555923462, "learning_rate": 9.394700676076374e-06, "loss": 1.2774, "step": 24156 }, { "epoch": 0.8651136139810554, "grad_norm": 1.3657279014587402, "learning_rate": 9.389792998090319e-06, "loss": 1.4303, "step": 24157 }, { "epoch": 0.8651494261106236, "grad_norm": 1.8122633695602417, "learning_rate": 9.384886539147718e-06, "loss": 1.4683, "step": 24158 }, { "epoch": 0.865185238240192, "grad_norm": 2.004755973815918, "learning_rate": 9.379981299314611e-06, "loss": 1.7386, "step": 24159 }, { "epoch": 0.8652210503697603, "grad_norm": 1.629216194152832, "learning_rate": 9.375077278656941e-06, "loss": 1.3271, "step": 24160 }, { "epoch": 0.8652568624993285, "grad_norm": 1.6570649147033691, "learning_rate": 9.370174477240712e-06, "loss": 1.3823, "step": 24161 }, { "epoch": 0.8652926746288968, "grad_norm": 1.4738001823425293, "learning_rate": 9.36527289513187e-06, "loss": 1.4896, "step": 24162 }, { "epoch": 0.8653284867584651, "grad_norm": 1.5269134044647217, "learning_rate": 9.3603725323964e-06, "loss": 1.3027, "step": 24163 }, { "epoch": 0.8653642988880333, "grad_norm": 2.1662659645080566, "learning_rate": 9.355473389100178e-06, "loss": 1.4489, "step": 24164 }, { "epoch": 0.8654001110176016, "grad_norm": 1.455833911895752, "learning_rate": 9.350575465309142e-06, "loss": 1.3106, "step": 24165 }, { "epoch": 0.86543592314717, "grad_norm": 1.9036498069763184, "learning_rate": 9.345678761089194e-06, "loss": 1.6722, "step": 24166 }, { "epoch": 0.8654717352767383, "grad_norm": 1.4704736471176147, "learning_rate": 9.340783276506193e-06, "loss": 1.2371, "step": 24167 }, { "epoch": 0.8655075474063065, "grad_norm": 1.6556400060653687, "learning_rate": 9.335889011626032e-06, "loss": 1.2193, "step": 24168 }, { "epoch": 0.8655433595358748, "grad_norm": 1.7995213270187378, "learning_rate": 9.330995966514489e-06, "loss": 1.4554, "step": 24169 }, { "epoch": 0.8655791716654431, "grad_norm": 1.5920368432998657, "learning_rate": 9.32610414123748e-06, "loss": 1.2939, "step": 24170 }, { "epoch": 0.8656149837950113, "grad_norm": 1.5632593631744385, "learning_rate": 9.321213535860763e-06, "loss": 1.2844, "step": 24171 }, { "epoch": 0.8656507959245796, "grad_norm": 1.6625022888183594, "learning_rate": 9.316324150450173e-06, "loss": 1.2916, "step": 24172 }, { "epoch": 0.865686608054148, "grad_norm": 1.8836668729782104, "learning_rate": 9.311435985071426e-06, "loss": 1.4325, "step": 24173 }, { "epoch": 0.8657224201837163, "grad_norm": 1.6699721813201904, "learning_rate": 9.30654903979037e-06, "loss": 1.4862, "step": 24174 }, { "epoch": 0.8657582323132845, "grad_norm": 1.752537488937378, "learning_rate": 9.301663314672704e-06, "loss": 1.4677, "step": 24175 }, { "epoch": 0.8657940444428528, "grad_norm": 2.023043155670166, "learning_rate": 9.296778809784123e-06, "loss": 1.2724, "step": 24176 }, { "epoch": 0.8658298565724211, "grad_norm": 1.663191556930542, "learning_rate": 9.29189552519043e-06, "loss": 1.4344, "step": 24177 }, { "epoch": 0.8658656687019893, "grad_norm": 1.4714888334274292, "learning_rate": 9.287013460957261e-06, "loss": 1.2581, "step": 24178 }, { "epoch": 0.8659014808315576, "grad_norm": 1.4269914627075195, "learning_rate": 9.28213261715033e-06, "loss": 1.4169, "step": 24179 }, { "epoch": 0.865937292961126, "grad_norm": 1.6043585538864136, "learning_rate": 9.27725299383525e-06, "loss": 1.5219, "step": 24180 }, { "epoch": 0.8659731050906943, "grad_norm": 1.789746642112732, "learning_rate": 9.272374591077748e-06, "loss": 1.2993, "step": 24181 }, { "epoch": 0.8660089172202625, "grad_norm": 1.4630588293075562, "learning_rate": 9.267497408943393e-06, "loss": 1.5637, "step": 24182 }, { "epoch": 0.8660447293498308, "grad_norm": 1.9434194564819336, "learning_rate": 9.262621447497844e-06, "loss": 1.3992, "step": 24183 }, { "epoch": 0.8660805414793991, "grad_norm": 1.6045405864715576, "learning_rate": 9.257746706806658e-06, "loss": 1.2954, "step": 24184 }, { "epoch": 0.8661163536089673, "grad_norm": 1.6443135738372803, "learning_rate": 9.252873186935452e-06, "loss": 1.2575, "step": 24185 }, { "epoch": 0.8661521657385356, "grad_norm": 2.0701043605804443, "learning_rate": 9.248000887949782e-06, "loss": 1.5517, "step": 24186 }, { "epoch": 0.866187977868104, "grad_norm": 1.616709589958191, "learning_rate": 9.243129809915175e-06, "loss": 1.4381, "step": 24187 }, { "epoch": 0.8662237899976722, "grad_norm": 1.3892414569854736, "learning_rate": 9.238259952897221e-06, "loss": 1.4791, "step": 24188 }, { "epoch": 0.8662596021272405, "grad_norm": 1.8243262767791748, "learning_rate": 9.233391316961393e-06, "loss": 1.6, "step": 24189 }, { "epoch": 0.8662954142568088, "grad_norm": 1.4055460691452026, "learning_rate": 9.228523902173214e-06, "loss": 1.2116, "step": 24190 }, { "epoch": 0.866331226386377, "grad_norm": 1.6112884283065796, "learning_rate": 9.223657708598133e-06, "loss": 1.3763, "step": 24191 }, { "epoch": 0.8663670385159453, "grad_norm": 1.9256858825683594, "learning_rate": 9.218792736301674e-06, "loss": 1.5623, "step": 24192 }, { "epoch": 0.8664028506455136, "grad_norm": 1.4280561208724976, "learning_rate": 9.213928985349252e-06, "loss": 1.3904, "step": 24193 }, { "epoch": 0.866438662775082, "grad_norm": 1.9041266441345215, "learning_rate": 9.209066455806303e-06, "loss": 1.5014, "step": 24194 }, { "epoch": 0.8664744749046502, "grad_norm": 1.7539561986923218, "learning_rate": 9.204205147738254e-06, "loss": 1.3906, "step": 24195 }, { "epoch": 0.8665102870342185, "grad_norm": 1.7698585987091064, "learning_rate": 9.199345061210495e-06, "loss": 1.4611, "step": 24196 }, { "epoch": 0.8665460991637868, "grad_norm": 1.589356541633606, "learning_rate": 9.194486196288454e-06, "loss": 1.4255, "step": 24197 }, { "epoch": 0.866581911293355, "grad_norm": 1.2249170541763306, "learning_rate": 9.189628553037445e-06, "loss": 1.4406, "step": 24198 }, { "epoch": 0.8666177234229233, "grad_norm": 1.425215482711792, "learning_rate": 9.184772131522845e-06, "loss": 1.3896, "step": 24199 }, { "epoch": 0.8666535355524916, "grad_norm": 1.1758129596710205, "learning_rate": 9.179916931809995e-06, "loss": 1.323, "step": 24200 }, { "epoch": 0.86668934768206, "grad_norm": 1.505043387413025, "learning_rate": 9.175062953964242e-06, "loss": 1.5766, "step": 24201 }, { "epoch": 0.8667251598116282, "grad_norm": 1.791693091392517, "learning_rate": 9.170210198050833e-06, "loss": 1.1097, "step": 24202 }, { "epoch": 0.8667609719411965, "grad_norm": 1.9780690670013428, "learning_rate": 9.165358664135082e-06, "loss": 1.4566, "step": 24203 }, { "epoch": 0.8667967840707648, "grad_norm": 1.7749983072280884, "learning_rate": 9.160508352282282e-06, "loss": 1.4966, "step": 24204 }, { "epoch": 0.866832596200333, "grad_norm": 2.2015771865844727, "learning_rate": 9.155659262557648e-06, "loss": 1.4599, "step": 24205 }, { "epoch": 0.8668684083299013, "grad_norm": 1.5244667530059814, "learning_rate": 9.150811395026448e-06, "loss": 1.4312, "step": 24206 }, { "epoch": 0.8669042204594696, "grad_norm": 2.0955560207366943, "learning_rate": 9.145964749753888e-06, "loss": 1.7519, "step": 24207 }, { "epoch": 0.866940032589038, "grad_norm": 1.8348698616027832, "learning_rate": 9.141119326805193e-06, "loss": 1.5078, "step": 24208 }, { "epoch": 0.8669758447186062, "grad_norm": 1.442132830619812, "learning_rate": 9.13627512624552e-06, "loss": 1.3298, "step": 24209 }, { "epoch": 0.8670116568481745, "grad_norm": 1.7003968954086304, "learning_rate": 9.131432148140062e-06, "loss": 1.665, "step": 24210 }, { "epoch": 0.8670474689777428, "grad_norm": 2.5281410217285156, "learning_rate": 9.126590392553992e-06, "loss": 1.4594, "step": 24211 }, { "epoch": 0.867083281107311, "grad_norm": 2.014244556427002, "learning_rate": 9.12174985955241e-06, "loss": 1.2994, "step": 24212 }, { "epoch": 0.8671190932368793, "grad_norm": 1.3764430284500122, "learning_rate": 9.116910549200452e-06, "loss": 1.1514, "step": 24213 }, { "epoch": 0.8671549053664476, "grad_norm": 2.000931978225708, "learning_rate": 9.112072461563248e-06, "loss": 1.557, "step": 24214 }, { "epoch": 0.867190717496016, "grad_norm": 1.996650218963623, "learning_rate": 9.107235596705877e-06, "loss": 1.3858, "step": 24215 }, { "epoch": 0.8672265296255842, "grad_norm": 1.7024036645889282, "learning_rate": 9.102399954693396e-06, "loss": 1.5788, "step": 24216 }, { "epoch": 0.8672623417551525, "grad_norm": 1.849635362625122, "learning_rate": 9.097565535590869e-06, "loss": 1.4354, "step": 24217 }, { "epoch": 0.8672981538847208, "grad_norm": 1.4759061336517334, "learning_rate": 9.092732339463339e-06, "loss": 1.2717, "step": 24218 }, { "epoch": 0.867333966014289, "grad_norm": 1.4988124370574951, "learning_rate": 9.087900366375868e-06, "loss": 1.5389, "step": 24219 }, { "epoch": 0.8673697781438573, "grad_norm": 1.6259818077087402, "learning_rate": 9.083069616393392e-06, "loss": 1.3329, "step": 24220 }, { "epoch": 0.8674055902734256, "grad_norm": 2.060462713241577, "learning_rate": 9.078240089580948e-06, "loss": 1.7412, "step": 24221 }, { "epoch": 0.8674414024029939, "grad_norm": 1.432573676109314, "learning_rate": 9.073411786003527e-06, "loss": 1.658, "step": 24222 }, { "epoch": 0.8674772145325622, "grad_norm": 1.677588939666748, "learning_rate": 9.068584705726035e-06, "loss": 1.2352, "step": 24223 }, { "epoch": 0.8675130266621305, "grad_norm": 2.123272180557251, "learning_rate": 9.063758848813452e-06, "loss": 1.4148, "step": 24224 }, { "epoch": 0.8675488387916988, "grad_norm": 1.933234453201294, "learning_rate": 9.058934215330695e-06, "loss": 1.3967, "step": 24225 }, { "epoch": 0.867584650921267, "grad_norm": 1.572487711906433, "learning_rate": 9.054110805342686e-06, "loss": 1.4756, "step": 24226 }, { "epoch": 0.8676204630508353, "grad_norm": 2.7359111309051514, "learning_rate": 9.049288618914276e-06, "loss": 1.6118, "step": 24227 }, { "epoch": 0.8676562751804036, "grad_norm": 1.771386981010437, "learning_rate": 9.044467656110389e-06, "loss": 1.3877, "step": 24228 }, { "epoch": 0.8676920873099719, "grad_norm": 1.7388691902160645, "learning_rate": 9.039647916995874e-06, "loss": 1.2226, "step": 24229 }, { "epoch": 0.8677278994395402, "grad_norm": 2.149545192718506, "learning_rate": 9.034829401635547e-06, "loss": 1.3794, "step": 24230 }, { "epoch": 0.8677637115691085, "grad_norm": 1.9506735801696777, "learning_rate": 9.030012110094255e-06, "loss": 1.5796, "step": 24231 }, { "epoch": 0.8677995236986767, "grad_norm": 1.4188599586486816, "learning_rate": 9.025196042436802e-06, "loss": 1.5882, "step": 24232 }, { "epoch": 0.867835335828245, "grad_norm": 1.2901597023010254, "learning_rate": 9.020381198728011e-06, "loss": 1.6489, "step": 24233 }, { "epoch": 0.8678711479578133, "grad_norm": 1.7088090181350708, "learning_rate": 9.015567579032614e-06, "loss": 1.5318, "step": 24234 }, { "epoch": 0.8679069600873816, "grad_norm": 1.7100541591644287, "learning_rate": 9.010755183415398e-06, "loss": 1.4519, "step": 24235 }, { "epoch": 0.8679427722169499, "grad_norm": 1.6215293407440186, "learning_rate": 9.005944011941103e-06, "loss": 1.4229, "step": 24236 }, { "epoch": 0.8679785843465182, "grad_norm": 1.7219916582107544, "learning_rate": 9.001134064674476e-06, "loss": 1.4134, "step": 24237 }, { "epoch": 0.8680143964760865, "grad_norm": 1.3429756164550781, "learning_rate": 8.99632534168019e-06, "loss": 1.6244, "step": 24238 }, { "epoch": 0.8680502086056547, "grad_norm": 1.7451717853546143, "learning_rate": 8.991517843022968e-06, "loss": 1.5262, "step": 24239 }, { "epoch": 0.868086020735223, "grad_norm": 1.512374758720398, "learning_rate": 8.986711568767493e-06, "loss": 1.2379, "step": 24240 }, { "epoch": 0.8681218328647913, "grad_norm": 2.092271089553833, "learning_rate": 8.981906518978389e-06, "loss": 1.6425, "step": 24241 }, { "epoch": 0.8681576449943595, "grad_norm": 1.376625657081604, "learning_rate": 8.977102693720341e-06, "loss": 1.5389, "step": 24242 }, { "epoch": 0.8681934571239279, "grad_norm": 1.661430835723877, "learning_rate": 8.97230009305795e-06, "loss": 1.507, "step": 24243 }, { "epoch": 0.8682292692534962, "grad_norm": 2.269094944000244, "learning_rate": 8.967498717055878e-06, "loss": 1.596, "step": 24244 }, { "epoch": 0.8682650813830645, "grad_norm": 1.464607834815979, "learning_rate": 8.96269856577866e-06, "loss": 1.4243, "step": 24245 }, { "epoch": 0.8683008935126327, "grad_norm": 1.3154370784759521, "learning_rate": 8.9578996392909e-06, "loss": 1.2494, "step": 24246 }, { "epoch": 0.868336705642201, "grad_norm": 1.6771907806396484, "learning_rate": 8.953101937657194e-06, "loss": 1.4635, "step": 24247 }, { "epoch": 0.8683725177717693, "grad_norm": 1.5361515283584595, "learning_rate": 8.94830546094203e-06, "loss": 1.6276, "step": 24248 }, { "epoch": 0.8684083299013375, "grad_norm": 1.7470072507858276, "learning_rate": 8.943510209209971e-06, "loss": 1.6124, "step": 24249 }, { "epoch": 0.8684441420309059, "grad_norm": 1.491672396659851, "learning_rate": 8.93871618252553e-06, "loss": 1.2757, "step": 24250 }, { "epoch": 0.8684799541604742, "grad_norm": 1.5673433542251587, "learning_rate": 8.933923380953224e-06, "loss": 1.0436, "step": 24251 }, { "epoch": 0.8685157662900425, "grad_norm": 1.1775404214859009, "learning_rate": 8.92913180455749e-06, "loss": 1.2953, "step": 24252 }, { "epoch": 0.8685515784196107, "grad_norm": 1.688716173171997, "learning_rate": 8.924341453402817e-06, "loss": 1.3285, "step": 24253 }, { "epoch": 0.868587390549179, "grad_norm": 1.6891595125198364, "learning_rate": 8.919552327553648e-06, "loss": 1.5579, "step": 24254 }, { "epoch": 0.8686232026787473, "grad_norm": 1.63966703414917, "learning_rate": 8.914764427074428e-06, "loss": 1.4901, "step": 24255 }, { "epoch": 0.8686590148083155, "grad_norm": 1.675835371017456, "learning_rate": 8.909977752029574e-06, "loss": 1.3802, "step": 24256 }, { "epoch": 0.8686948269378839, "grad_norm": 2.029541492462158, "learning_rate": 8.905192302483433e-06, "loss": 1.4674, "step": 24257 }, { "epoch": 0.8687306390674522, "grad_norm": 1.7141716480255127, "learning_rate": 8.900408078500454e-06, "loss": 1.4041, "step": 24258 }, { "epoch": 0.8687664511970205, "grad_norm": 1.9804600477218628, "learning_rate": 8.895625080144965e-06, "loss": 1.4072, "step": 24259 }, { "epoch": 0.8688022633265887, "grad_norm": 1.474840760231018, "learning_rate": 8.890843307481322e-06, "loss": 1.3555, "step": 24260 }, { "epoch": 0.868838075456157, "grad_norm": 1.8305799961090088, "learning_rate": 8.886062760573854e-06, "loss": 1.4299, "step": 24261 }, { "epoch": 0.8688738875857253, "grad_norm": 1.6935603618621826, "learning_rate": 8.88128343948691e-06, "loss": 1.416, "step": 24262 }, { "epoch": 0.8689096997152935, "grad_norm": 1.5021482706069946, "learning_rate": 8.876505344284758e-06, "loss": 1.318, "step": 24263 }, { "epoch": 0.8689455118448619, "grad_norm": 2.0541741847991943, "learning_rate": 8.871728475031649e-06, "loss": 1.6846, "step": 24264 }, { "epoch": 0.8689813239744302, "grad_norm": 1.6807490587234497, "learning_rate": 8.86695283179192e-06, "loss": 1.2177, "step": 24265 }, { "epoch": 0.8690171361039984, "grad_norm": 1.686801552772522, "learning_rate": 8.862178414629774e-06, "loss": 1.6565, "step": 24266 }, { "epoch": 0.8690529482335667, "grad_norm": 1.6299697160720825, "learning_rate": 8.857405223609472e-06, "loss": 1.5448, "step": 24267 }, { "epoch": 0.869088760363135, "grad_norm": 2.021028518676758, "learning_rate": 8.852633258795185e-06, "loss": 1.4393, "step": 24268 }, { "epoch": 0.8691245724927033, "grad_norm": 1.4445551633834839, "learning_rate": 8.847862520251182e-06, "loss": 1.1612, "step": 24269 }, { "epoch": 0.8691603846222715, "grad_norm": 1.3987027406692505, "learning_rate": 8.843093008041591e-06, "loss": 1.5816, "step": 24270 }, { "epoch": 0.8691961967518399, "grad_norm": 1.6368407011032104, "learning_rate": 8.838324722230595e-06, "loss": 1.1717, "step": 24271 }, { "epoch": 0.8692320088814082, "grad_norm": 1.2989962100982666, "learning_rate": 8.833557662882374e-06, "loss": 1.4289, "step": 24272 }, { "epoch": 0.8692678210109764, "grad_norm": 1.8490816354751587, "learning_rate": 8.828791830061022e-06, "loss": 1.2979, "step": 24273 }, { "epoch": 0.8693036331405447, "grad_norm": 1.353345274925232, "learning_rate": 8.824027223830688e-06, "loss": 1.4558, "step": 24274 }, { "epoch": 0.869339445270113, "grad_norm": 1.5162136554718018, "learning_rate": 8.819263844255432e-06, "loss": 1.5938, "step": 24275 }, { "epoch": 0.8693752573996812, "grad_norm": 1.5665966272354126, "learning_rate": 8.81450169139939e-06, "loss": 1.3911, "step": 24276 }, { "epoch": 0.8694110695292495, "grad_norm": 1.4457532167434692, "learning_rate": 8.809740765326591e-06, "loss": 1.2053, "step": 24277 }, { "epoch": 0.8694468816588179, "grad_norm": 2.0011956691741943, "learning_rate": 8.804981066101126e-06, "loss": 1.682, "step": 24278 }, { "epoch": 0.8694826937883862, "grad_norm": 1.6146661043167114, "learning_rate": 8.800222593786967e-06, "loss": 1.6388, "step": 24279 }, { "epoch": 0.8695185059179544, "grad_norm": 1.7931370735168457, "learning_rate": 8.795465348448218e-06, "loss": 1.3895, "step": 24280 }, { "epoch": 0.8695543180475227, "grad_norm": 3.107492446899414, "learning_rate": 8.790709330148828e-06, "loss": 1.2655, "step": 24281 }, { "epoch": 0.869590130177091, "grad_norm": 2.4997458457946777, "learning_rate": 8.78595453895278e-06, "loss": 1.363, "step": 24282 }, { "epoch": 0.8696259423066592, "grad_norm": 1.7735134363174438, "learning_rate": 8.781200974924053e-06, "loss": 1.5793, "step": 24283 }, { "epoch": 0.8696617544362275, "grad_norm": 1.5544660091400146, "learning_rate": 8.7764486381266e-06, "loss": 1.3186, "step": 24284 }, { "epoch": 0.8696975665657959, "grad_norm": 1.9983875751495361, "learning_rate": 8.77169752862439e-06, "loss": 1.5833, "step": 24285 }, { "epoch": 0.8697333786953642, "grad_norm": 1.391190767288208, "learning_rate": 8.76694764648126e-06, "loss": 1.5987, "step": 24286 }, { "epoch": 0.8697691908249324, "grad_norm": 1.8695008754730225, "learning_rate": 8.762198991761217e-06, "loss": 1.2977, "step": 24287 }, { "epoch": 0.8698050029545007, "grad_norm": 1.3335654735565186, "learning_rate": 8.757451564528074e-06, "loss": 1.2562, "step": 24288 }, { "epoch": 0.869840815084069, "grad_norm": 1.5261073112487793, "learning_rate": 8.752705364845748e-06, "loss": 1.3221, "step": 24289 }, { "epoch": 0.8698766272136372, "grad_norm": 1.8547850847244263, "learning_rate": 8.747960392778053e-06, "loss": 1.3653, "step": 24290 }, { "epoch": 0.8699124393432055, "grad_norm": 1.607026219367981, "learning_rate": 8.74321664838884e-06, "loss": 1.3366, "step": 24291 }, { "epoch": 0.8699482514727739, "grad_norm": 1.540658950805664, "learning_rate": 8.738474131741958e-06, "loss": 1.1223, "step": 24292 }, { "epoch": 0.8699840636023422, "grad_norm": 2.3103864192962646, "learning_rate": 8.733732842901166e-06, "loss": 1.3668, "step": 24293 }, { "epoch": 0.8700198757319104, "grad_norm": 1.749535322189331, "learning_rate": 8.728992781930278e-06, "loss": 1.3763, "step": 24294 }, { "epoch": 0.8700556878614787, "grad_norm": 1.4846516847610474, "learning_rate": 8.724253948893057e-06, "loss": 1.4243, "step": 24295 }, { "epoch": 0.870091499991047, "grad_norm": 1.5358998775482178, "learning_rate": 8.719516343853273e-06, "loss": 1.5699, "step": 24296 }, { "epoch": 0.8701273121206152, "grad_norm": 1.1911826133728027, "learning_rate": 8.71477996687463e-06, "loss": 1.5351, "step": 24297 }, { "epoch": 0.8701631242501835, "grad_norm": 2.5798017978668213, "learning_rate": 8.710044818020902e-06, "loss": 1.2529, "step": 24298 }, { "epoch": 0.8701989363797519, "grad_norm": 1.7193776369094849, "learning_rate": 8.705310897355768e-06, "loss": 1.4123, "step": 24299 }, { "epoch": 0.8702347485093201, "grad_norm": 1.6127065420150757, "learning_rate": 8.700578204942889e-06, "loss": 1.6177, "step": 24300 }, { "epoch": 0.8702705606388884, "grad_norm": 1.6372747421264648, "learning_rate": 8.69584674084597e-06, "loss": 0.992, "step": 24301 }, { "epoch": 0.8703063727684567, "grad_norm": 1.3636530637741089, "learning_rate": 8.69111650512866e-06, "loss": 1.3641, "step": 24302 }, { "epoch": 0.870342184898025, "grad_norm": 1.8584938049316406, "learning_rate": 8.686387497854609e-06, "loss": 1.3151, "step": 24303 }, { "epoch": 0.8703779970275932, "grad_norm": 1.4046014547348022, "learning_rate": 8.681659719087421e-06, "loss": 1.7345, "step": 24304 }, { "epoch": 0.8704138091571615, "grad_norm": 1.3433245420455933, "learning_rate": 8.676933168890699e-06, "loss": 1.2366, "step": 24305 }, { "epoch": 0.8704496212867299, "grad_norm": 2.0000412464141846, "learning_rate": 8.67220784732804e-06, "loss": 1.1339, "step": 24306 }, { "epoch": 0.8704854334162981, "grad_norm": 1.5002861022949219, "learning_rate": 8.667483754463046e-06, "loss": 1.4521, "step": 24307 }, { "epoch": 0.8705212455458664, "grad_norm": 1.84786856174469, "learning_rate": 8.662760890359233e-06, "loss": 1.2317, "step": 24308 }, { "epoch": 0.8705570576754347, "grad_norm": 1.4311268329620361, "learning_rate": 8.658039255080153e-06, "loss": 1.4301, "step": 24309 }, { "epoch": 0.870592869805003, "grad_norm": 1.6267906427383423, "learning_rate": 8.65331884868934e-06, "loss": 1.6193, "step": 24310 }, { "epoch": 0.8706286819345712, "grad_norm": 1.5863173007965088, "learning_rate": 8.64859967125029e-06, "loss": 1.4066, "step": 24311 }, { "epoch": 0.8706644940641395, "grad_norm": 1.7492663860321045, "learning_rate": 8.643881722826486e-06, "loss": 1.1955, "step": 24312 }, { "epoch": 0.8707003061937079, "grad_norm": 1.6891770362854004, "learning_rate": 8.639165003481408e-06, "loss": 1.2417, "step": 24313 }, { "epoch": 0.8707361183232761, "grad_norm": 1.444863200187683, "learning_rate": 8.634449513278553e-06, "loss": 1.4051, "step": 24314 }, { "epoch": 0.8707719304528444, "grad_norm": 2.0153067111968994, "learning_rate": 8.629735252281301e-06, "loss": 1.3517, "step": 24315 }, { "epoch": 0.8708077425824127, "grad_norm": 1.9217655658721924, "learning_rate": 8.625022220553091e-06, "loss": 1.282, "step": 24316 }, { "epoch": 0.8708435547119809, "grad_norm": 1.494198203086853, "learning_rate": 8.620310418157374e-06, "loss": 1.2652, "step": 24317 }, { "epoch": 0.8708793668415492, "grad_norm": 1.5170859098434448, "learning_rate": 8.615599845157484e-06, "loss": 1.7015, "step": 24318 }, { "epoch": 0.8709151789711175, "grad_norm": 1.4580488204956055, "learning_rate": 8.61089050161683e-06, "loss": 1.7065, "step": 24319 }, { "epoch": 0.8709509911006859, "grad_norm": 2.421257734298706, "learning_rate": 8.60618238759875e-06, "loss": 1.3544, "step": 24320 }, { "epoch": 0.8709868032302541, "grad_norm": 1.232548713684082, "learning_rate": 8.601475503166623e-06, "loss": 1.237, "step": 24321 }, { "epoch": 0.8710226153598224, "grad_norm": 1.703403115272522, "learning_rate": 8.596769848383723e-06, "loss": 1.4257, "step": 24322 }, { "epoch": 0.8710584274893907, "grad_norm": 1.6034023761749268, "learning_rate": 8.592065423313378e-06, "loss": 1.2383, "step": 24323 }, { "epoch": 0.8710942396189589, "grad_norm": 1.508851408958435, "learning_rate": 8.587362228018892e-06, "loss": 1.5151, "step": 24324 }, { "epoch": 0.8711300517485272, "grad_norm": 1.593440055847168, "learning_rate": 8.582660262563558e-06, "loss": 1.5984, "step": 24325 }, { "epoch": 0.8711658638780955, "grad_norm": 2.054398775100708, "learning_rate": 8.577959527010582e-06, "loss": 1.169, "step": 24326 }, { "epoch": 0.8712016760076638, "grad_norm": 1.829206943511963, "learning_rate": 8.573260021423236e-06, "loss": 1.2653, "step": 24327 }, { "epoch": 0.8712374881372321, "grad_norm": 1.868972897529602, "learning_rate": 8.568561745864766e-06, "loss": 1.5099, "step": 24328 }, { "epoch": 0.8712733002668004, "grad_norm": 1.5950393676757812, "learning_rate": 8.563864700398338e-06, "loss": 1.6054, "step": 24329 }, { "epoch": 0.8713091123963687, "grad_norm": 1.582476258277893, "learning_rate": 8.559168885087165e-06, "loss": 1.603, "step": 24330 }, { "epoch": 0.8713449245259369, "grad_norm": 1.48959481716156, "learning_rate": 8.554474299994431e-06, "loss": 1.4577, "step": 24331 }, { "epoch": 0.8713807366555052, "grad_norm": 1.9258126020431519, "learning_rate": 8.549780945183306e-06, "loss": 1.4982, "step": 24332 }, { "epoch": 0.8714165487850735, "grad_norm": 2.175291061401367, "learning_rate": 8.545088820716895e-06, "loss": 1.5243, "step": 24333 }, { "epoch": 0.8714523609146418, "grad_norm": 1.4352703094482422, "learning_rate": 8.54039792665835e-06, "loss": 1.2539, "step": 24334 }, { "epoch": 0.8714881730442101, "grad_norm": 1.5487613677978516, "learning_rate": 8.535708263070785e-06, "loss": 1.5298, "step": 24335 }, { "epoch": 0.8715239851737784, "grad_norm": 1.46132230758667, "learning_rate": 8.531019830017272e-06, "loss": 1.4797, "step": 24336 }, { "epoch": 0.8715597973033467, "grad_norm": 1.3812408447265625, "learning_rate": 8.526332627560906e-06, "loss": 1.1609, "step": 24337 }, { "epoch": 0.8715956094329149, "grad_norm": 1.4614298343658447, "learning_rate": 8.521646655764736e-06, "loss": 1.2881, "step": 24338 }, { "epoch": 0.8716314215624832, "grad_norm": 1.8450669050216675, "learning_rate": 8.516961914691835e-06, "loss": 1.1969, "step": 24339 }, { "epoch": 0.8716672336920515, "grad_norm": 2.6135294437408447, "learning_rate": 8.512278404405182e-06, "loss": 1.5435, "step": 24340 }, { "epoch": 0.8717030458216198, "grad_norm": 1.3671050071716309, "learning_rate": 8.507596124967821e-06, "loss": 1.4997, "step": 24341 }, { "epoch": 0.8717388579511881, "grad_norm": 1.8961228132247925, "learning_rate": 8.50291507644273e-06, "loss": 1.3494, "step": 24342 }, { "epoch": 0.8717746700807564, "grad_norm": 1.6030116081237793, "learning_rate": 8.498235258892907e-06, "loss": 1.6106, "step": 24343 }, { "epoch": 0.8718104822103246, "grad_norm": 1.4362305402755737, "learning_rate": 8.493556672381297e-06, "loss": 1.4159, "step": 24344 }, { "epoch": 0.8718462943398929, "grad_norm": 1.694785237312317, "learning_rate": 8.488879316970832e-06, "loss": 1.3855, "step": 24345 }, { "epoch": 0.8718821064694612, "grad_norm": 1.857426643371582, "learning_rate": 8.484203192724482e-06, "loss": 1.4179, "step": 24346 }, { "epoch": 0.8719179185990295, "grad_norm": 1.4686394929885864, "learning_rate": 8.479528299705108e-06, "loss": 1.217, "step": 24347 }, { "epoch": 0.8719537307285978, "grad_norm": 1.5443724393844604, "learning_rate": 8.474854637975638e-06, "loss": 1.3838, "step": 24348 }, { "epoch": 0.8719895428581661, "grad_norm": 1.604174017906189, "learning_rate": 8.47018220759893e-06, "loss": 1.8491, "step": 24349 }, { "epoch": 0.8720253549877344, "grad_norm": 1.6408330202102661, "learning_rate": 8.465511008637872e-06, "loss": 1.503, "step": 24350 }, { "epoch": 0.8720611671173026, "grad_norm": 2.2034032344818115, "learning_rate": 8.460841041155277e-06, "loss": 1.7287, "step": 24351 }, { "epoch": 0.8720969792468709, "grad_norm": 2.2767324447631836, "learning_rate": 8.456172305213995e-06, "loss": 1.2505, "step": 24352 }, { "epoch": 0.8721327913764392, "grad_norm": 1.8010529279708862, "learning_rate": 8.45150480087684e-06, "loss": 1.4255, "step": 24353 }, { "epoch": 0.8721686035060074, "grad_norm": 1.6472227573394775, "learning_rate": 8.44683852820659e-06, "loss": 1.5661, "step": 24354 }, { "epoch": 0.8722044156355758, "grad_norm": 1.3859483003616333, "learning_rate": 8.442173487266047e-06, "loss": 1.047, "step": 24355 }, { "epoch": 0.8722402277651441, "grad_norm": 1.3888520002365112, "learning_rate": 8.437509678117916e-06, "loss": 1.2829, "step": 24356 }, { "epoch": 0.8722760398947124, "grad_norm": 1.3679659366607666, "learning_rate": 8.432847100825025e-06, "loss": 1.2947, "step": 24357 }, { "epoch": 0.8723118520242806, "grad_norm": 2.372196912765503, "learning_rate": 8.428185755450047e-06, "loss": 1.318, "step": 24358 }, { "epoch": 0.8723476641538489, "grad_norm": 1.549730896949768, "learning_rate": 8.423525642055719e-06, "loss": 1.5575, "step": 24359 }, { "epoch": 0.8723834762834172, "grad_norm": 1.8781365156173706, "learning_rate": 8.418866760704735e-06, "loss": 1.4678, "step": 24360 }, { "epoch": 0.8724192884129854, "grad_norm": 2.044919729232788, "learning_rate": 8.414209111459747e-06, "loss": 1.4384, "step": 24361 }, { "epoch": 0.8724551005425537, "grad_norm": 1.8532249927520752, "learning_rate": 8.409552694383472e-06, "loss": 1.1847, "step": 24362 }, { "epoch": 0.8724909126721221, "grad_norm": 1.6493250131607056, "learning_rate": 8.404897509538468e-06, "loss": 1.3719, "step": 24363 }, { "epoch": 0.8725267248016904, "grad_norm": 1.678415298461914, "learning_rate": 8.400243556987464e-06, "loss": 1.0674, "step": 24364 }, { "epoch": 0.8725625369312586, "grad_norm": 1.7217615842819214, "learning_rate": 8.39559083679301e-06, "loss": 1.647, "step": 24365 }, { "epoch": 0.8725983490608269, "grad_norm": 1.3149148225784302, "learning_rate": 8.390939349017735e-06, "loss": 1.2332, "step": 24366 }, { "epoch": 0.8726341611903952, "grad_norm": 1.6030855178833008, "learning_rate": 8.386289093724175e-06, "loss": 1.3858, "step": 24367 }, { "epoch": 0.8726699733199634, "grad_norm": 1.845257043838501, "learning_rate": 8.38164007097495e-06, "loss": 1.278, "step": 24368 }, { "epoch": 0.8727057854495317, "grad_norm": 1.8243887424468994, "learning_rate": 8.376992280832574e-06, "loss": 1.4684, "step": 24369 }, { "epoch": 0.8727415975791001, "grad_norm": 1.6303426027297974, "learning_rate": 8.372345723359553e-06, "loss": 1.6383, "step": 24370 }, { "epoch": 0.8727774097086684, "grad_norm": 1.4345486164093018, "learning_rate": 8.367700398618472e-06, "loss": 1.2597, "step": 24371 }, { "epoch": 0.8728132218382366, "grad_norm": 1.5594035387039185, "learning_rate": 8.363056306671757e-06, "loss": 1.1994, "step": 24372 }, { "epoch": 0.8728490339678049, "grad_norm": 1.6975489854812622, "learning_rate": 8.358413447581937e-06, "loss": 1.1322, "step": 24373 }, { "epoch": 0.8728848460973732, "grad_norm": 1.6841044425964355, "learning_rate": 8.353771821411415e-06, "loss": 1.8269, "step": 24374 }, { "epoch": 0.8729206582269414, "grad_norm": 1.4348398447036743, "learning_rate": 8.349131428222723e-06, "loss": 1.4246, "step": 24375 }, { "epoch": 0.8729564703565097, "grad_norm": 2.420299768447876, "learning_rate": 8.344492268078219e-06, "loss": 1.3549, "step": 24376 }, { "epoch": 0.8729922824860781, "grad_norm": 1.3469812870025635, "learning_rate": 8.339854341040376e-06, "loss": 1.4121, "step": 24377 }, { "epoch": 0.8730280946156463, "grad_norm": 1.9128682613372803, "learning_rate": 8.335217647171533e-06, "loss": 1.5137, "step": 24378 }, { "epoch": 0.8730639067452146, "grad_norm": 1.6239603757858276, "learning_rate": 8.330582186534097e-06, "loss": 1.3636, "step": 24379 }, { "epoch": 0.8730997188747829, "grad_norm": 1.9807374477386475, "learning_rate": 8.32594795919045e-06, "loss": 1.7255, "step": 24380 }, { "epoch": 0.8731355310043512, "grad_norm": 1.6199429035186768, "learning_rate": 8.321314965202898e-06, "loss": 1.3875, "step": 24381 }, { "epoch": 0.8731713431339194, "grad_norm": 1.5612422227859497, "learning_rate": 8.316683204633814e-06, "loss": 1.3494, "step": 24382 }, { "epoch": 0.8732071552634877, "grad_norm": 2.1652863025665283, "learning_rate": 8.312052677545478e-06, "loss": 1.4789, "step": 24383 }, { "epoch": 0.8732429673930561, "grad_norm": 1.5461417436599731, "learning_rate": 8.307423384000224e-06, "loss": 1.6285, "step": 24384 }, { "epoch": 0.8732787795226243, "grad_norm": 1.2789685726165771, "learning_rate": 8.302795324060287e-06, "loss": 1.2883, "step": 24385 }, { "epoch": 0.8733145916521926, "grad_norm": 2.073131799697876, "learning_rate": 8.298168497787984e-06, "loss": 1.3102, "step": 24386 }, { "epoch": 0.8733504037817609, "grad_norm": 1.3319644927978516, "learning_rate": 8.293542905245543e-06, "loss": 1.1161, "step": 24387 }, { "epoch": 0.8733862159113291, "grad_norm": 1.5022635459899902, "learning_rate": 8.288918546495172e-06, "loss": 1.5562, "step": 24388 }, { "epoch": 0.8734220280408974, "grad_norm": 2.067774534225464, "learning_rate": 8.284295421599097e-06, "loss": 1.5094, "step": 24389 }, { "epoch": 0.8734578401704657, "grad_norm": 1.600555658340454, "learning_rate": 8.279673530619525e-06, "loss": 1.6114, "step": 24390 }, { "epoch": 0.8734936523000341, "grad_norm": 1.613824486732483, "learning_rate": 8.27505287361866e-06, "loss": 1.443, "step": 24391 }, { "epoch": 0.8735294644296023, "grad_norm": 1.7431727647781372, "learning_rate": 8.270433450658621e-06, "loss": 1.5098, "step": 24392 }, { "epoch": 0.8735652765591706, "grad_norm": 1.3850111961364746, "learning_rate": 8.265815261801568e-06, "loss": 1.5287, "step": 24393 }, { "epoch": 0.8736010886887389, "grad_norm": 1.909886360168457, "learning_rate": 8.261198307109651e-06, "loss": 1.533, "step": 24394 }, { "epoch": 0.8736369008183071, "grad_norm": 1.8211636543273926, "learning_rate": 8.25658258664499e-06, "loss": 1.6334, "step": 24395 }, { "epoch": 0.8736727129478754, "grad_norm": 1.3177467584609985, "learning_rate": 8.251968100469653e-06, "loss": 1.3133, "step": 24396 }, { "epoch": 0.8737085250774437, "grad_norm": 1.4866849184036255, "learning_rate": 8.247354848645738e-06, "loss": 1.5015, "step": 24397 }, { "epoch": 0.8737443372070121, "grad_norm": 1.5447543859481812, "learning_rate": 8.242742831235339e-06, "loss": 1.5953, "step": 24398 }, { "epoch": 0.8737801493365803, "grad_norm": 1.414366602897644, "learning_rate": 8.23813204830045e-06, "loss": 1.1296, "step": 24399 }, { "epoch": 0.8738159614661486, "grad_norm": 1.600353479385376, "learning_rate": 8.233522499903123e-06, "loss": 1.287, "step": 24400 }, { "epoch": 0.8738517735957169, "grad_norm": 1.3291378021240234, "learning_rate": 8.228914186105397e-06, "loss": 1.2978, "step": 24401 }, { "epoch": 0.8738875857252851, "grad_norm": 1.3929870128631592, "learning_rate": 8.224307106969264e-06, "loss": 1.3377, "step": 24402 }, { "epoch": 0.8739233978548534, "grad_norm": 1.5553607940673828, "learning_rate": 8.219701262556678e-06, "loss": 1.3124, "step": 24403 }, { "epoch": 0.8739592099844217, "grad_norm": 1.6648629903793335, "learning_rate": 8.21509665292962e-06, "loss": 1.2963, "step": 24404 }, { "epoch": 0.87399502211399, "grad_norm": 1.5509344339370728, "learning_rate": 8.210493278150066e-06, "loss": 1.3433, "step": 24405 }, { "epoch": 0.8740308342435583, "grad_norm": 1.741041660308838, "learning_rate": 8.205891138279898e-06, "loss": 1.3943, "step": 24406 }, { "epoch": 0.8740666463731266, "grad_norm": 1.877867341041565, "learning_rate": 8.201290233381075e-06, "loss": 1.662, "step": 24407 }, { "epoch": 0.8741024585026949, "grad_norm": 1.2536922693252563, "learning_rate": 8.196690563515463e-06, "loss": 0.8697, "step": 24408 }, { "epoch": 0.8741382706322631, "grad_norm": 2.1341769695281982, "learning_rate": 8.192092128744988e-06, "loss": 1.1999, "step": 24409 }, { "epoch": 0.8741740827618314, "grad_norm": 1.4079196453094482, "learning_rate": 8.187494929131478e-06, "loss": 1.447, "step": 24410 }, { "epoch": 0.8742098948913997, "grad_norm": 1.608680009841919, "learning_rate": 8.182898964736785e-06, "loss": 1.5774, "step": 24411 }, { "epoch": 0.874245707020968, "grad_norm": 1.5955612659454346, "learning_rate": 8.178304235622758e-06, "loss": 1.735, "step": 24412 }, { "epoch": 0.8742815191505363, "grad_norm": 1.7357336282730103, "learning_rate": 8.173710741851215e-06, "loss": 1.4914, "step": 24413 }, { "epoch": 0.8743173312801046, "grad_norm": 1.8703807592391968, "learning_rate": 8.169118483483928e-06, "loss": 1.552, "step": 24414 }, { "epoch": 0.8743531434096729, "grad_norm": 1.617150902748108, "learning_rate": 8.164527460582705e-06, "loss": 1.4209, "step": 24415 }, { "epoch": 0.8743889555392411, "grad_norm": 2.064117670059204, "learning_rate": 8.159937673209327e-06, "loss": 1.6424, "step": 24416 }, { "epoch": 0.8744247676688094, "grad_norm": 1.8013323545455933, "learning_rate": 8.155349121425504e-06, "loss": 1.4639, "step": 24417 }, { "epoch": 0.8744605797983777, "grad_norm": 1.68295419216156, "learning_rate": 8.150761805292983e-06, "loss": 1.3943, "step": 24418 }, { "epoch": 0.874496391927946, "grad_norm": 1.8984606266021729, "learning_rate": 8.146175724873485e-06, "loss": 1.3342, "step": 24419 }, { "epoch": 0.8745322040575143, "grad_norm": 1.3739839792251587, "learning_rate": 8.141590880228722e-06, "loss": 1.4143, "step": 24420 }, { "epoch": 0.8745680161870826, "grad_norm": 1.4901645183563232, "learning_rate": 8.137007271420349e-06, "loss": 1.2584, "step": 24421 }, { "epoch": 0.8746038283166508, "grad_norm": 1.6518315076828003, "learning_rate": 8.132424898510061e-06, "loss": 1.285, "step": 24422 }, { "epoch": 0.8746396404462191, "grad_norm": 1.4844005107879639, "learning_rate": 8.127843761559506e-06, "loss": 1.5776, "step": 24423 }, { "epoch": 0.8746754525757874, "grad_norm": 2.039109945297241, "learning_rate": 8.123263860630282e-06, "loss": 1.5137, "step": 24424 }, { "epoch": 0.8747112647053557, "grad_norm": 1.6008793115615845, "learning_rate": 8.118685195784037e-06, "loss": 1.2386, "step": 24425 }, { "epoch": 0.874747076834924, "grad_norm": 1.5261561870574951, "learning_rate": 8.114107767082358e-06, "loss": 1.5291, "step": 24426 }, { "epoch": 0.8747828889644923, "grad_norm": 1.8413728475570679, "learning_rate": 8.109531574586859e-06, "loss": 1.0062, "step": 24427 }, { "epoch": 0.8748187010940606, "grad_norm": 2.2254388332366943, "learning_rate": 8.10495661835906e-06, "loss": 1.4563, "step": 24428 }, { "epoch": 0.8748545132236288, "grad_norm": 2.0652709007263184, "learning_rate": 8.100382898460546e-06, "loss": 1.7514, "step": 24429 }, { "epoch": 0.8748903253531971, "grad_norm": 1.7497395277023315, "learning_rate": 8.095810414952832e-06, "loss": 1.2252, "step": 24430 }, { "epoch": 0.8749261374827654, "grad_norm": 1.9762898683547974, "learning_rate": 8.091239167897446e-06, "loss": 1.4887, "step": 24431 }, { "epoch": 0.8749619496123336, "grad_norm": 1.4887042045593262, "learning_rate": 8.086669157355876e-06, "loss": 1.6301, "step": 24432 }, { "epoch": 0.874997761741902, "grad_norm": 1.460737705230713, "learning_rate": 8.082100383389613e-06, "loss": 1.2551, "step": 24433 }, { "epoch": 0.8750335738714703, "grad_norm": 1.5650736093521118, "learning_rate": 8.077532846060143e-06, "loss": 1.298, "step": 24434 }, { "epoch": 0.8750693860010386, "grad_norm": 1.7930569648742676, "learning_rate": 8.072966545428873e-06, "loss": 1.467, "step": 24435 }, { "epoch": 0.8751051981306068, "grad_norm": 2.4084696769714355, "learning_rate": 8.068401481557263e-06, "loss": 1.3533, "step": 24436 }, { "epoch": 0.8751410102601751, "grad_norm": 1.8376762866973877, "learning_rate": 8.063837654506734e-06, "loss": 1.3193, "step": 24437 }, { "epoch": 0.8751768223897434, "grad_norm": 1.538162350654602, "learning_rate": 8.059275064338689e-06, "loss": 1.3677, "step": 24438 }, { "epoch": 0.8752126345193116, "grad_norm": 1.8487403392791748, "learning_rate": 8.054713711114491e-06, "loss": 1.4857, "step": 24439 }, { "epoch": 0.87524844664888, "grad_norm": 2.232276678085327, "learning_rate": 8.050153594895526e-06, "loss": 1.6025, "step": 24440 }, { "epoch": 0.8752842587784483, "grad_norm": 1.5102007389068604, "learning_rate": 8.045594715743144e-06, "loss": 0.9663, "step": 24441 }, { "epoch": 0.8753200709080166, "grad_norm": 1.7949094772338867, "learning_rate": 8.04103707371866e-06, "loss": 1.3811, "step": 24442 }, { "epoch": 0.8753558830375848, "grad_norm": 1.8622803688049316, "learning_rate": 8.036480668883394e-06, "loss": 1.3921, "step": 24443 }, { "epoch": 0.8753916951671531, "grad_norm": 1.5665391683578491, "learning_rate": 8.031925501298666e-06, "loss": 1.338, "step": 24444 }, { "epoch": 0.8754275072967214, "grad_norm": 1.5506495237350464, "learning_rate": 8.027371571025765e-06, "loss": 1.5878, "step": 24445 }, { "epoch": 0.8754633194262896, "grad_norm": 1.6303693056106567, "learning_rate": 8.022818878125926e-06, "loss": 1.5915, "step": 24446 }, { "epoch": 0.875499131555858, "grad_norm": 1.6259897947311401, "learning_rate": 8.018267422660419e-06, "loss": 1.731, "step": 24447 }, { "epoch": 0.8755349436854263, "grad_norm": 1.2927833795547485, "learning_rate": 8.013717204690474e-06, "loss": 1.2528, "step": 24448 }, { "epoch": 0.8755707558149946, "grad_norm": 1.6778570413589478, "learning_rate": 8.00916822427733e-06, "loss": 1.3148, "step": 24449 }, { "epoch": 0.8756065679445628, "grad_norm": 1.671055555343628, "learning_rate": 8.004620481482161e-06, "loss": 1.2224, "step": 24450 }, { "epoch": 0.8756423800741311, "grad_norm": 2.1273000240325928, "learning_rate": 8.00007397636613e-06, "loss": 1.5564, "step": 24451 }, { "epoch": 0.8756781922036994, "grad_norm": 1.361875295639038, "learning_rate": 7.995528708990463e-06, "loss": 1.5403, "step": 24452 }, { "epoch": 0.8757140043332676, "grad_norm": 1.7198835611343384, "learning_rate": 7.990984679416269e-06, "loss": 1.28, "step": 24453 }, { "epoch": 0.875749816462836, "grad_norm": 1.4493197202682495, "learning_rate": 7.986441887704687e-06, "loss": 1.2897, "step": 24454 }, { "epoch": 0.8757856285924043, "grad_norm": 1.5954997539520264, "learning_rate": 7.981900333916848e-06, "loss": 1.2537, "step": 24455 }, { "epoch": 0.8758214407219725, "grad_norm": 1.679919958114624, "learning_rate": 7.977360018113855e-06, "loss": 1.438, "step": 24456 }, { "epoch": 0.8758572528515408, "grad_norm": 1.6092506647109985, "learning_rate": 7.972820940356785e-06, "loss": 1.3889, "step": 24457 }, { "epoch": 0.8758930649811091, "grad_norm": 2.2504239082336426, "learning_rate": 7.968283100706664e-06, "loss": 1.4879, "step": 24458 }, { "epoch": 0.8759288771106774, "grad_norm": 2.6346192359924316, "learning_rate": 7.963746499224611e-06, "loss": 1.4057, "step": 24459 }, { "epoch": 0.8759646892402456, "grad_norm": 2.1243059635162354, "learning_rate": 7.959211135971622e-06, "loss": 1.4884, "step": 24460 }, { "epoch": 0.876000501369814, "grad_norm": 1.6332237720489502, "learning_rate": 7.954677011008749e-06, "loss": 1.1818, "step": 24461 }, { "epoch": 0.8760363134993823, "grad_norm": 1.5698171854019165, "learning_rate": 7.95014412439692e-06, "loss": 1.4265, "step": 24462 }, { "epoch": 0.8760721256289505, "grad_norm": 1.746675729751587, "learning_rate": 7.945612476197207e-06, "loss": 1.6762, "step": 24463 }, { "epoch": 0.8761079377585188, "grad_norm": 1.5642447471618652, "learning_rate": 7.941082066470507e-06, "loss": 1.2722, "step": 24464 }, { "epoch": 0.8761437498880871, "grad_norm": 2.2350451946258545, "learning_rate": 7.936552895277826e-06, "loss": 1.7897, "step": 24465 }, { "epoch": 0.8761795620176553, "grad_norm": 2.5448694229125977, "learning_rate": 7.932024962680062e-06, "loss": 1.3325, "step": 24466 }, { "epoch": 0.8762153741472236, "grad_norm": 1.8758574724197388, "learning_rate": 7.927498268738132e-06, "loss": 1.0997, "step": 24467 }, { "epoch": 0.876251186276792, "grad_norm": 2.587082624435425, "learning_rate": 7.922972813512974e-06, "loss": 1.5028, "step": 24468 }, { "epoch": 0.8762869984063603, "grad_norm": 1.386791467666626, "learning_rate": 7.918448597065408e-06, "loss": 1.5839, "step": 24469 }, { "epoch": 0.8763228105359285, "grad_norm": 1.7087793350219727, "learning_rate": 7.913925619456374e-06, "loss": 1.6286, "step": 24470 }, { "epoch": 0.8763586226654968, "grad_norm": 1.446853756904602, "learning_rate": 7.909403880746669e-06, "loss": 1.3785, "step": 24471 }, { "epoch": 0.8763944347950651, "grad_norm": 2.251840829849243, "learning_rate": 7.904883380997164e-06, "loss": 1.453, "step": 24472 }, { "epoch": 0.8764302469246333, "grad_norm": 1.5899436473846436, "learning_rate": 7.900364120268622e-06, "loss": 1.61, "step": 24473 }, { "epoch": 0.8764660590542016, "grad_norm": 1.766593098640442, "learning_rate": 7.895846098621917e-06, "loss": 1.1907, "step": 24474 }, { "epoch": 0.87650187118377, "grad_norm": 1.9567400217056274, "learning_rate": 7.891329316117801e-06, "loss": 1.363, "step": 24475 }, { "epoch": 0.8765376833133383, "grad_norm": 1.5550438165664673, "learning_rate": 7.886813772817026e-06, "loss": 1.1627, "step": 24476 }, { "epoch": 0.8765734954429065, "grad_norm": 1.8496496677398682, "learning_rate": 7.88229946878034e-06, "loss": 1.1868, "step": 24477 }, { "epoch": 0.8766093075724748, "grad_norm": 1.919206142425537, "learning_rate": 7.877786404068498e-06, "loss": 1.7735, "step": 24478 }, { "epoch": 0.8766451197020431, "grad_norm": 1.3942233324050903, "learning_rate": 7.873274578742229e-06, "loss": 1.4318, "step": 24479 }, { "epoch": 0.8766809318316113, "grad_norm": 1.6327040195465088, "learning_rate": 7.868763992862182e-06, "loss": 1.1995, "step": 24480 }, { "epoch": 0.8767167439611796, "grad_norm": 1.5736989974975586, "learning_rate": 7.864254646489099e-06, "loss": 1.1477, "step": 24481 }, { "epoch": 0.876752556090748, "grad_norm": 1.382622480392456, "learning_rate": 7.859746539683621e-06, "loss": 1.565, "step": 24482 }, { "epoch": 0.8767883682203162, "grad_norm": 1.465562105178833, "learning_rate": 7.855239672506408e-06, "loss": 1.4932, "step": 24483 }, { "epoch": 0.8768241803498845, "grad_norm": 1.737520456314087, "learning_rate": 7.85073404501807e-06, "loss": 1.3897, "step": 24484 }, { "epoch": 0.8768599924794528, "grad_norm": 1.8517922163009644, "learning_rate": 7.846229657279246e-06, "loss": 1.6007, "step": 24485 }, { "epoch": 0.8768958046090211, "grad_norm": 1.5409523248672485, "learning_rate": 7.841726509350545e-06, "loss": 1.3853, "step": 24486 }, { "epoch": 0.8769316167385893, "grad_norm": 1.7995675802230835, "learning_rate": 7.837224601292525e-06, "loss": 1.4295, "step": 24487 }, { "epoch": 0.8769674288681576, "grad_norm": 2.4543213844299316, "learning_rate": 7.832723933165764e-06, "loss": 1.4674, "step": 24488 }, { "epoch": 0.877003240997726, "grad_norm": 1.7001053094863892, "learning_rate": 7.828224505030823e-06, "loss": 1.3251, "step": 24489 }, { "epoch": 0.8770390531272942, "grad_norm": 1.9298815727233887, "learning_rate": 7.823726316948232e-06, "loss": 1.528, "step": 24490 }, { "epoch": 0.8770748652568625, "grad_norm": 1.8918613195419312, "learning_rate": 7.819229368978498e-06, "loss": 1.6044, "step": 24491 }, { "epoch": 0.8771106773864308, "grad_norm": 1.5881575345993042, "learning_rate": 7.814733661182116e-06, "loss": 1.6521, "step": 24492 }, { "epoch": 0.877146489515999, "grad_norm": 1.3574950695037842, "learning_rate": 7.810239193619618e-06, "loss": 1.2039, "step": 24493 }, { "epoch": 0.8771823016455673, "grad_norm": 1.7643013000488281, "learning_rate": 7.805745966351407e-06, "loss": 0.9625, "step": 24494 }, { "epoch": 0.8772181137751356, "grad_norm": 1.515485167503357, "learning_rate": 7.801253979437962e-06, "loss": 1.5384, "step": 24495 }, { "epoch": 0.877253925904704, "grad_norm": 1.3017157316207886, "learning_rate": 7.796763232939719e-06, "loss": 1.2687, "step": 24496 }, { "epoch": 0.8772897380342722, "grad_norm": 1.8666355609893799, "learning_rate": 7.79227372691711e-06, "loss": 1.4063, "step": 24497 }, { "epoch": 0.8773255501638405, "grad_norm": 1.7395696640014648, "learning_rate": 7.787785461430498e-06, "loss": 1.4451, "step": 24498 }, { "epoch": 0.8773613622934088, "grad_norm": 1.4882618188858032, "learning_rate": 7.783298436540288e-06, "loss": 1.4995, "step": 24499 }, { "epoch": 0.877397174422977, "grad_norm": 1.7713781595230103, "learning_rate": 7.778812652306844e-06, "loss": 1.5565, "step": 24500 }, { "epoch": 0.8774329865525453, "grad_norm": 1.427703619003296, "learning_rate": 7.774328108790541e-06, "loss": 1.297, "step": 24501 }, { "epoch": 0.8774687986821136, "grad_norm": 2.4432625770568848, "learning_rate": 7.769844806051674e-06, "loss": 1.7429, "step": 24502 }, { "epoch": 0.877504610811682, "grad_norm": 1.2416493892669678, "learning_rate": 7.765362744150573e-06, "loss": 1.3341, "step": 24503 }, { "epoch": 0.8775404229412502, "grad_norm": 1.4567826986312866, "learning_rate": 7.760881923147567e-06, "loss": 1.5861, "step": 24504 }, { "epoch": 0.8775762350708185, "grad_norm": 1.5808217525482178, "learning_rate": 7.756402343102897e-06, "loss": 1.5537, "step": 24505 }, { "epoch": 0.8776120472003868, "grad_norm": 1.6409419775009155, "learning_rate": 7.751924004076837e-06, "loss": 1.1795, "step": 24506 }, { "epoch": 0.877647859329955, "grad_norm": 1.580722689628601, "learning_rate": 7.747446906129662e-06, "loss": 1.4825, "step": 24507 }, { "epoch": 0.8776836714595233, "grad_norm": 1.7144967317581177, "learning_rate": 7.742971049321601e-06, "loss": 1.392, "step": 24508 }, { "epoch": 0.8777194835890916, "grad_norm": 1.1489810943603516, "learning_rate": 7.738496433712839e-06, "loss": 1.3657, "step": 24509 }, { "epoch": 0.87775529571866, "grad_norm": 1.4573662281036377, "learning_rate": 7.734023059363605e-06, "loss": 1.3468, "step": 24510 }, { "epoch": 0.8777911078482282, "grad_norm": 1.420721173286438, "learning_rate": 7.729550926334094e-06, "loss": 1.3038, "step": 24511 }, { "epoch": 0.8778269199777965, "grad_norm": 1.3883370161056519, "learning_rate": 7.72508003468444e-06, "loss": 1.7398, "step": 24512 }, { "epoch": 0.8778627321073648, "grad_norm": 1.651623010635376, "learning_rate": 7.720610384474802e-06, "loss": 1.4708, "step": 24513 }, { "epoch": 0.877898544236933, "grad_norm": 1.322274088859558, "learning_rate": 7.716141975765322e-06, "loss": 1.4895, "step": 24514 }, { "epoch": 0.8779343563665013, "grad_norm": 1.9884264469146729, "learning_rate": 7.711674808616132e-06, "loss": 1.4892, "step": 24515 }, { "epoch": 0.8779701684960696, "grad_norm": 1.6073400974273682, "learning_rate": 7.70720888308729e-06, "loss": 1.5275, "step": 24516 }, { "epoch": 0.878005980625638, "grad_norm": 2.0732638835906982, "learning_rate": 7.70274419923892e-06, "loss": 1.4096, "step": 24517 }, { "epoch": 0.8780417927552062, "grad_norm": 1.6035428047180176, "learning_rate": 7.69828075713106e-06, "loss": 1.4707, "step": 24518 }, { "epoch": 0.8780776048847745, "grad_norm": 1.8507529497146606, "learning_rate": 7.693818556823784e-06, "loss": 1.6353, "step": 24519 }, { "epoch": 0.8781134170143428, "grad_norm": 1.507523536682129, "learning_rate": 7.6893575983771e-06, "loss": 1.3616, "step": 24520 }, { "epoch": 0.878149229143911, "grad_norm": 1.8455419540405273, "learning_rate": 7.68489788185105e-06, "loss": 1.6077, "step": 24521 }, { "epoch": 0.8781850412734793, "grad_norm": 1.4134904146194458, "learning_rate": 7.680439407305629e-06, "loss": 1.493, "step": 24522 }, { "epoch": 0.8782208534030476, "grad_norm": 1.6321110725402832, "learning_rate": 7.675982174800788e-06, "loss": 1.4207, "step": 24523 }, { "epoch": 0.8782566655326159, "grad_norm": 1.6989505290985107, "learning_rate": 7.671526184396527e-06, "loss": 1.5392, "step": 24524 }, { "epoch": 0.8782924776621842, "grad_norm": 1.6748425960540771, "learning_rate": 7.667071436152784e-06, "loss": 1.3084, "step": 24525 }, { "epoch": 0.8783282897917525, "grad_norm": 1.3120161294937134, "learning_rate": 7.662617930129502e-06, "loss": 1.4486, "step": 24526 }, { "epoch": 0.8783641019213208, "grad_norm": 1.5598652362823486, "learning_rate": 7.658165666386585e-06, "loss": 1.503, "step": 24527 }, { "epoch": 0.878399914050889, "grad_norm": 1.6046355962753296, "learning_rate": 7.653714644983923e-06, "loss": 1.3375, "step": 24528 }, { "epoch": 0.8784357261804573, "grad_norm": 1.784735918045044, "learning_rate": 7.649264865981443e-06, "loss": 1.6171, "step": 24529 }, { "epoch": 0.8784715383100256, "grad_norm": 1.5161055326461792, "learning_rate": 7.644816329438952e-06, "loss": 1.4565, "step": 24530 }, { "epoch": 0.8785073504395939, "grad_norm": 1.4199373722076416, "learning_rate": 7.640369035416339e-06, "loss": 1.6551, "step": 24531 }, { "epoch": 0.8785431625691622, "grad_norm": 1.6012083292007446, "learning_rate": 7.63592298397342e-06, "loss": 1.2885, "step": 24532 }, { "epoch": 0.8785789746987305, "grad_norm": 1.571999430656433, "learning_rate": 7.631478175170026e-06, "loss": 1.4313, "step": 24533 }, { "epoch": 0.8786147868282987, "grad_norm": 1.8361530303955078, "learning_rate": 7.627034609065942e-06, "loss": 1.4186, "step": 24534 }, { "epoch": 0.878650598957867, "grad_norm": 1.6053346395492554, "learning_rate": 7.622592285720942e-06, "loss": 1.3284, "step": 24535 }, { "epoch": 0.8786864110874353, "grad_norm": 2.1718499660491943, "learning_rate": 7.618151205194813e-06, "loss": 1.2265, "step": 24536 }, { "epoch": 0.8787222232170036, "grad_norm": 1.7951548099517822, "learning_rate": 7.613711367547316e-06, "loss": 1.5307, "step": 24537 }, { "epoch": 0.8787580353465719, "grad_norm": 2.2277722358703613, "learning_rate": 7.609272772838138e-06, "loss": 1.8082, "step": 24538 }, { "epoch": 0.8787938474761402, "grad_norm": 1.3839033842086792, "learning_rate": 7.604835421127021e-06, "loss": 1.268, "step": 24539 }, { "epoch": 0.8788296596057085, "grad_norm": 1.9207768440246582, "learning_rate": 7.600399312473683e-06, "loss": 1.7225, "step": 24540 }, { "epoch": 0.8788654717352767, "grad_norm": 1.5807201862335205, "learning_rate": 7.595964446937764e-06, "loss": 1.6244, "step": 24541 }, { "epoch": 0.878901283864845, "grad_norm": 1.7699261903762817, "learning_rate": 7.591530824578952e-06, "loss": 1.6804, "step": 24542 }, { "epoch": 0.8789370959944133, "grad_norm": 1.3223094940185547, "learning_rate": 7.587098445456897e-06, "loss": 1.3716, "step": 24543 }, { "epoch": 0.8789729081239815, "grad_norm": 1.6015185117721558, "learning_rate": 7.582667309631242e-06, "loss": 1.3739, "step": 24544 }, { "epoch": 0.8790087202535499, "grad_norm": 1.5923868417739868, "learning_rate": 7.578237417161571e-06, "loss": 1.4783, "step": 24545 }, { "epoch": 0.8790445323831182, "grad_norm": 2.426682710647583, "learning_rate": 7.573808768107504e-06, "loss": 1.4759, "step": 24546 }, { "epoch": 0.8790803445126865, "grad_norm": 1.7707291841506958, "learning_rate": 7.569381362528638e-06, "loss": 1.2952, "step": 24547 }, { "epoch": 0.8791161566422547, "grad_norm": 1.452986240386963, "learning_rate": 7.5649552004844915e-06, "loss": 1.4157, "step": 24548 }, { "epoch": 0.879151968771823, "grad_norm": 1.917758822441101, "learning_rate": 7.560530282034662e-06, "loss": 1.3931, "step": 24549 }, { "epoch": 0.8791877809013913, "grad_norm": 1.3515937328338623, "learning_rate": 7.556106607238633e-06, "loss": 1.4283, "step": 24550 }, { "epoch": 0.8792235930309595, "grad_norm": 1.6925004720687866, "learning_rate": 7.551684176155971e-06, "loss": 1.3824, "step": 24551 }, { "epoch": 0.8792594051605279, "grad_norm": 1.9788447618484497, "learning_rate": 7.547262988846126e-06, "loss": 1.2657, "step": 24552 }, { "epoch": 0.8792952172900962, "grad_norm": 1.8088815212249756, "learning_rate": 7.542843045368609e-06, "loss": 1.6715, "step": 24553 }, { "epoch": 0.8793310294196645, "grad_norm": 1.4498738050460815, "learning_rate": 7.538424345782902e-06, "loss": 1.3569, "step": 24554 }, { "epoch": 0.8793668415492327, "grad_norm": 1.5786463022232056, "learning_rate": 7.534006890148404e-06, "loss": 1.2862, "step": 24555 }, { "epoch": 0.879402653678801, "grad_norm": 1.7088158130645752, "learning_rate": 7.52959067852459e-06, "loss": 1.4563, "step": 24556 }, { "epoch": 0.8794384658083693, "grad_norm": 1.9697120189666748, "learning_rate": 7.525175710970811e-06, "loss": 1.4709, "step": 24557 }, { "epoch": 0.8794742779379375, "grad_norm": 1.5277745723724365, "learning_rate": 7.520761987546554e-06, "loss": 1.2802, "step": 24558 }, { "epoch": 0.8795100900675059, "grad_norm": 2.2481420040130615, "learning_rate": 7.516349508311138e-06, "loss": 1.3251, "step": 24559 }, { "epoch": 0.8795459021970742, "grad_norm": 1.6836209297180176, "learning_rate": 7.51193827332396e-06, "loss": 1.5707, "step": 24560 }, { "epoch": 0.8795817143266424, "grad_norm": 1.6443015336990356, "learning_rate": 7.507528282644316e-06, "loss": 1.592, "step": 24561 }, { "epoch": 0.8796175264562107, "grad_norm": 1.5825704336166382, "learning_rate": 7.503119536331604e-06, "loss": 1.4901, "step": 24562 }, { "epoch": 0.879653338585779, "grad_norm": 1.8732868432998657, "learning_rate": 7.49871203444511e-06, "loss": 1.2958, "step": 24563 }, { "epoch": 0.8796891507153473, "grad_norm": 1.4926637411117554, "learning_rate": 7.494305777044086e-06, "loss": 1.3454, "step": 24564 }, { "epoch": 0.8797249628449155, "grad_norm": 1.602117896080017, "learning_rate": 7.489900764187896e-06, "loss": 1.6786, "step": 24565 }, { "epoch": 0.8797607749744839, "grad_norm": 2.2812118530273438, "learning_rate": 7.485496995935748e-06, "loss": 1.5049, "step": 24566 }, { "epoch": 0.8797965871040522, "grad_norm": 1.250343918800354, "learning_rate": 7.481094472346905e-06, "loss": 1.3796, "step": 24567 }, { "epoch": 0.8798323992336204, "grad_norm": 1.6296786069869995, "learning_rate": 7.476693193480577e-06, "loss": 1.5116, "step": 24568 }, { "epoch": 0.8798682113631887, "grad_norm": 1.5637654066085815, "learning_rate": 7.472293159396027e-06, "loss": 1.5897, "step": 24569 }, { "epoch": 0.879904023492757, "grad_norm": 1.8617569208145142, "learning_rate": 7.4678943701523954e-06, "loss": 1.4735, "step": 24570 }, { "epoch": 0.8799398356223253, "grad_norm": 2.2335526943206787, "learning_rate": 7.4634968258089135e-06, "loss": 1.5358, "step": 24571 }, { "epoch": 0.8799756477518935, "grad_norm": 1.9554426670074463, "learning_rate": 7.4591005264246895e-06, "loss": 1.4661, "step": 24572 }, { "epoch": 0.8800114598814619, "grad_norm": 2.1411080360412598, "learning_rate": 7.454705472058909e-06, "loss": 1.3836, "step": 24573 }, { "epoch": 0.8800472720110302, "grad_norm": 2.0509655475616455, "learning_rate": 7.450311662770704e-06, "loss": 1.215, "step": 24574 }, { "epoch": 0.8800830841405984, "grad_norm": 1.5214743614196777, "learning_rate": 7.445919098619159e-06, "loss": 1.2725, "step": 24575 }, { "epoch": 0.8801188962701667, "grad_norm": 1.6003098487854004, "learning_rate": 7.441527779663382e-06, "loss": 1.5988, "step": 24576 }, { "epoch": 0.880154708399735, "grad_norm": 1.7950252294540405, "learning_rate": 7.43713770596246e-06, "loss": 1.3825, "step": 24577 }, { "epoch": 0.8801905205293032, "grad_norm": 1.5984803438186646, "learning_rate": 7.4327488775754794e-06, "loss": 1.3248, "step": 24578 }, { "epoch": 0.8802263326588715, "grad_norm": 1.6045230627059937, "learning_rate": 7.428361294561415e-06, "loss": 1.2454, "step": 24579 }, { "epoch": 0.8802621447884399, "grad_norm": 2.036458969116211, "learning_rate": 7.423974956979374e-06, "loss": 1.4281, "step": 24580 }, { "epoch": 0.8802979569180082, "grad_norm": 1.4843889474868774, "learning_rate": 7.419589864888332e-06, "loss": 1.4315, "step": 24581 }, { "epoch": 0.8803337690475764, "grad_norm": 1.5990513563156128, "learning_rate": 7.415206018347287e-06, "loss": 1.1032, "step": 24582 }, { "epoch": 0.8803695811771447, "grad_norm": 1.3269641399383545, "learning_rate": 7.410823417415203e-06, "loss": 1.2407, "step": 24583 }, { "epoch": 0.880405393306713, "grad_norm": 1.8271068334579468, "learning_rate": 7.406442062151064e-06, "loss": 1.3426, "step": 24584 }, { "epoch": 0.8804412054362812, "grad_norm": 2.1693825721740723, "learning_rate": 7.402061952613826e-06, "loss": 1.126, "step": 24585 }, { "epoch": 0.8804770175658495, "grad_norm": 1.887425422668457, "learning_rate": 7.397683088862395e-06, "loss": 1.5462, "step": 24586 }, { "epoch": 0.8805128296954179, "grad_norm": 1.5298993587493896, "learning_rate": 7.393305470955681e-06, "loss": 1.1619, "step": 24587 }, { "epoch": 0.8805486418249862, "grad_norm": 2.064223051071167, "learning_rate": 7.388929098952579e-06, "loss": 1.2954, "step": 24588 }, { "epoch": 0.8805844539545544, "grad_norm": 1.1978247165679932, "learning_rate": 7.384553972912011e-06, "loss": 1.537, "step": 24589 }, { "epoch": 0.8806202660841227, "grad_norm": 1.5021591186523438, "learning_rate": 7.380180092892775e-06, "loss": 1.4642, "step": 24590 }, { "epoch": 0.880656078213691, "grad_norm": 2.6436407566070557, "learning_rate": 7.375807458953743e-06, "loss": 1.812, "step": 24591 }, { "epoch": 0.8806918903432592, "grad_norm": 1.5209324359893799, "learning_rate": 7.37143607115377e-06, "loss": 1.3444, "step": 24592 }, { "epoch": 0.8807277024728275, "grad_norm": 1.360662579536438, "learning_rate": 7.36706592955162e-06, "loss": 1.3151, "step": 24593 }, { "epoch": 0.8807635146023959, "grad_norm": 1.9590721130371094, "learning_rate": 7.362697034206112e-06, "loss": 1.177, "step": 24594 }, { "epoch": 0.8807993267319641, "grad_norm": 1.7141327857971191, "learning_rate": 7.358329385176033e-06, "loss": 1.4844, "step": 24595 }, { "epoch": 0.8808351388615324, "grad_norm": 1.7014778852462769, "learning_rate": 7.353962982520135e-06, "loss": 1.5032, "step": 24596 }, { "epoch": 0.8808709509911007, "grad_norm": 1.8111387491226196, "learning_rate": 7.34959782629715e-06, "loss": 1.264, "step": 24597 }, { "epoch": 0.880906763120669, "grad_norm": 1.8225723505020142, "learning_rate": 7.345233916565808e-06, "loss": 1.4492, "step": 24598 }, { "epoch": 0.8809425752502372, "grad_norm": 1.3599964380264282, "learning_rate": 7.340871253384851e-06, "loss": 1.4305, "step": 24599 }, { "epoch": 0.8809783873798055, "grad_norm": 1.8249688148498535, "learning_rate": 7.336509836812933e-06, "loss": 1.1971, "step": 24600 }, { "epoch": 0.8810141995093739, "grad_norm": 1.7372807264328003, "learning_rate": 7.3321496669087495e-06, "loss": 1.4024, "step": 24601 }, { "epoch": 0.8810500116389421, "grad_norm": 1.6618596315383911, "learning_rate": 7.327790743730956e-06, "loss": 1.3195, "step": 24602 }, { "epoch": 0.8810858237685104, "grad_norm": 1.5446751117706299, "learning_rate": 7.323433067338214e-06, "loss": 1.426, "step": 24603 }, { "epoch": 0.8811216358980787, "grad_norm": 1.6365025043487549, "learning_rate": 7.319076637789124e-06, "loss": 1.1681, "step": 24604 }, { "epoch": 0.881157448027647, "grad_norm": 1.3218605518341064, "learning_rate": 7.314721455142304e-06, "loss": 1.3863, "step": 24605 }, { "epoch": 0.8811932601572152, "grad_norm": 1.4129111766815186, "learning_rate": 7.310367519456352e-06, "loss": 1.5447, "step": 24606 }, { "epoch": 0.8812290722867835, "grad_norm": 2.340679407119751, "learning_rate": 7.306014830789865e-06, "loss": 1.4607, "step": 24607 }, { "epoch": 0.8812648844163519, "grad_norm": 2.0600128173828125, "learning_rate": 7.3016633892013634e-06, "loss": 1.5905, "step": 24608 }, { "epoch": 0.8813006965459201, "grad_norm": 1.7693760395050049, "learning_rate": 7.2973131947494e-06, "loss": 1.3652, "step": 24609 }, { "epoch": 0.8813365086754884, "grad_norm": 1.6240615844726562, "learning_rate": 7.292964247492539e-06, "loss": 1.3207, "step": 24610 }, { "epoch": 0.8813723208050567, "grad_norm": 1.4567763805389404, "learning_rate": 7.288616547489235e-06, "loss": 1.3986, "step": 24611 }, { "epoch": 0.8814081329346249, "grad_norm": 1.3678109645843506, "learning_rate": 7.284270094798018e-06, "loss": 1.2433, "step": 24612 }, { "epoch": 0.8814439450641932, "grad_norm": 1.4017517566680908, "learning_rate": 7.279924889477341e-06, "loss": 1.4941, "step": 24613 }, { "epoch": 0.8814797571937615, "grad_norm": 2.248542308807373, "learning_rate": 7.27558093158569e-06, "loss": 1.5972, "step": 24614 }, { "epoch": 0.8815155693233299, "grad_norm": 1.4761277437210083, "learning_rate": 7.2712382211814865e-06, "loss": 1.4937, "step": 24615 }, { "epoch": 0.8815513814528981, "grad_norm": 1.3250051736831665, "learning_rate": 7.266896758323149e-06, "loss": 1.1396, "step": 24616 }, { "epoch": 0.8815871935824664, "grad_norm": 2.3315348625183105, "learning_rate": 7.2625565430691214e-06, "loss": 1.128, "step": 24617 }, { "epoch": 0.8816230057120347, "grad_norm": 1.9104201793670654, "learning_rate": 7.258217575477755e-06, "loss": 1.364, "step": 24618 }, { "epoch": 0.8816588178416029, "grad_norm": 1.6324379444122314, "learning_rate": 7.253879855607437e-06, "loss": 1.4386, "step": 24619 }, { "epoch": 0.8816946299711712, "grad_norm": 1.622747540473938, "learning_rate": 7.249543383516544e-06, "loss": 1.3758, "step": 24620 }, { "epoch": 0.8817304421007395, "grad_norm": 1.622939109802246, "learning_rate": 7.245208159263417e-06, "loss": 1.5202, "step": 24621 }, { "epoch": 0.8817662542303079, "grad_norm": 1.5521178245544434, "learning_rate": 7.240874182906343e-06, "loss": 1.3341, "step": 24622 }, { "epoch": 0.8818020663598761, "grad_norm": 1.466652512550354, "learning_rate": 7.236541454503664e-06, "loss": 1.5546, "step": 24623 }, { "epoch": 0.8818378784894444, "grad_norm": 1.4532678127288818, "learning_rate": 7.232209974113668e-06, "loss": 1.3809, "step": 24624 }, { "epoch": 0.8818736906190127, "grad_norm": 2.0897300243377686, "learning_rate": 7.2278797417946405e-06, "loss": 1.6313, "step": 24625 }, { "epoch": 0.8819095027485809, "grad_norm": 1.290476679801941, "learning_rate": 7.2235507576048024e-06, "loss": 0.8929, "step": 24626 }, { "epoch": 0.8819453148781492, "grad_norm": 1.5610119104385376, "learning_rate": 7.219223021602417e-06, "loss": 1.1235, "step": 24627 }, { "epoch": 0.8819811270077175, "grad_norm": 1.5145996809005737, "learning_rate": 7.214896533845716e-06, "loss": 1.5866, "step": 24628 }, { "epoch": 0.8820169391372858, "grad_norm": 1.9009678363800049, "learning_rate": 7.210571294392898e-06, "loss": 1.5812, "step": 24629 }, { "epoch": 0.8820527512668541, "grad_norm": 1.3867923021316528, "learning_rate": 7.206247303302138e-06, "loss": 1.3757, "step": 24630 }, { "epoch": 0.8820885633964224, "grad_norm": 2.2626278400421143, "learning_rate": 7.201924560631634e-06, "loss": 1.6227, "step": 24631 }, { "epoch": 0.8821243755259907, "grad_norm": 1.3764382600784302, "learning_rate": 7.197603066439551e-06, "loss": 1.1527, "step": 24632 }, { "epoch": 0.8821601876555589, "grad_norm": 1.6069098711013794, "learning_rate": 7.193282820783987e-06, "loss": 1.3079, "step": 24633 }, { "epoch": 0.8821959997851272, "grad_norm": 1.8032952547073364, "learning_rate": 7.188963823723105e-06, "loss": 1.4146, "step": 24634 }, { "epoch": 0.8822318119146955, "grad_norm": 1.4079662561416626, "learning_rate": 7.184646075315005e-06, "loss": 1.1088, "step": 24635 }, { "epoch": 0.8822676240442638, "grad_norm": 1.5412856340408325, "learning_rate": 7.18032957561775e-06, "loss": 1.5314, "step": 24636 }, { "epoch": 0.8823034361738321, "grad_norm": 1.5019010305404663, "learning_rate": 7.176014324689428e-06, "loss": 1.5056, "step": 24637 }, { "epoch": 0.8823392483034004, "grad_norm": 1.4662113189697266, "learning_rate": 7.171700322588115e-06, "loss": 1.3196, "step": 24638 }, { "epoch": 0.8823750604329686, "grad_norm": 1.510379433631897, "learning_rate": 7.167387569371842e-06, "loss": 1.2243, "step": 24639 }, { "epoch": 0.8824108725625369, "grad_norm": 1.4555983543395996, "learning_rate": 7.1630760650986065e-06, "loss": 1.353, "step": 24640 }, { "epoch": 0.8824466846921052, "grad_norm": 1.6988203525543213, "learning_rate": 7.158765809826429e-06, "loss": 1.5011, "step": 24641 }, { "epoch": 0.8824824968216735, "grad_norm": 1.522375464439392, "learning_rate": 7.154456803613297e-06, "loss": 1.2669, "step": 24642 }, { "epoch": 0.8825183089512418, "grad_norm": 1.9415677785873413, "learning_rate": 7.150149046517218e-06, "loss": 1.7027, "step": 24643 }, { "epoch": 0.8825541210808101, "grad_norm": 1.302003264427185, "learning_rate": 7.145842538596104e-06, "loss": 1.3639, "step": 24644 }, { "epoch": 0.8825899332103784, "grad_norm": 1.48600435256958, "learning_rate": 7.141537279907873e-06, "loss": 1.3566, "step": 24645 }, { "epoch": 0.8826257453399466, "grad_norm": 1.4763420820236206, "learning_rate": 7.1372332705105125e-06, "loss": 1.1619, "step": 24646 }, { "epoch": 0.8826615574695149, "grad_norm": 1.6381981372833252, "learning_rate": 7.132930510461889e-06, "loss": 1.5885, "step": 24647 }, { "epoch": 0.8826973695990832, "grad_norm": 1.9131838083267212, "learning_rate": 7.128628999819886e-06, "loss": 1.5576, "step": 24648 }, { "epoch": 0.8827331817286515, "grad_norm": 2.0828161239624023, "learning_rate": 7.1243287386423826e-06, "loss": 1.5515, "step": 24649 }, { "epoch": 0.8827689938582198, "grad_norm": 1.6835256814956665, "learning_rate": 7.120029726987254e-06, "loss": 1.5039, "step": 24650 }, { "epoch": 0.8828048059877881, "grad_norm": 1.6714760065078735, "learning_rate": 7.11573196491232e-06, "loss": 1.5961, "step": 24651 }, { "epoch": 0.8828406181173564, "grad_norm": 1.5934548377990723, "learning_rate": 7.111435452475368e-06, "loss": 1.5434, "step": 24652 }, { "epoch": 0.8828764302469246, "grad_norm": 1.663367509841919, "learning_rate": 7.1071401897342625e-06, "loss": 1.3505, "step": 24653 }, { "epoch": 0.8829122423764929, "grad_norm": 1.8386807441711426, "learning_rate": 7.1028461767467466e-06, "loss": 1.7194, "step": 24654 }, { "epoch": 0.8829480545060612, "grad_norm": 1.7250741720199585, "learning_rate": 7.0985534135706296e-06, "loss": 1.4797, "step": 24655 }, { "epoch": 0.8829838666356294, "grad_norm": 1.8594763278961182, "learning_rate": 7.0942619002635995e-06, "loss": 1.5841, "step": 24656 }, { "epoch": 0.8830196787651978, "grad_norm": 1.341265320777893, "learning_rate": 7.089971636883475e-06, "loss": 1.3489, "step": 24657 }, { "epoch": 0.8830554908947661, "grad_norm": 1.5680384635925293, "learning_rate": 7.085682623487921e-06, "loss": 1.3567, "step": 24658 }, { "epoch": 0.8830913030243344, "grad_norm": 1.4889854192733765, "learning_rate": 7.0813948601346715e-06, "loss": 1.5167, "step": 24659 }, { "epoch": 0.8831271151539026, "grad_norm": 2.0524439811706543, "learning_rate": 7.077108346881378e-06, "loss": 1.2639, "step": 24660 }, { "epoch": 0.8831629272834709, "grad_norm": 1.2576946020126343, "learning_rate": 7.07282308378574e-06, "loss": 1.4163, "step": 24661 }, { "epoch": 0.8831987394130392, "grad_norm": 1.7365771532058716, "learning_rate": 7.068539070905411e-06, "loss": 1.8081, "step": 24662 }, { "epoch": 0.8832345515426074, "grad_norm": 1.9017252922058105, "learning_rate": 7.064256308297978e-06, "loss": 1.7369, "step": 24663 }, { "epoch": 0.8832703636721758, "grad_norm": 1.9138576984405518, "learning_rate": 7.05997479602114e-06, "loss": 1.2112, "step": 24664 }, { "epoch": 0.8833061758017441, "grad_norm": 1.3498430252075195, "learning_rate": 7.0556945341324284e-06, "loss": 1.4391, "step": 24665 }, { "epoch": 0.8833419879313124, "grad_norm": 1.7585177421569824, "learning_rate": 7.051415522689487e-06, "loss": 1.632, "step": 24666 }, { "epoch": 0.8833778000608806, "grad_norm": 1.7154725790023804, "learning_rate": 7.047137761749811e-06, "loss": 1.4024, "step": 24667 }, { "epoch": 0.8834136121904489, "grad_norm": 1.4533997774124146, "learning_rate": 7.042861251371036e-06, "loss": 1.1525, "step": 24668 }, { "epoch": 0.8834494243200172, "grad_norm": 1.5111123323440552, "learning_rate": 7.038585991610647e-06, "loss": 1.3345, "step": 24669 }, { "epoch": 0.8834852364495854, "grad_norm": 2.691683769226074, "learning_rate": 7.034311982526165e-06, "loss": 1.4586, "step": 24670 }, { "epoch": 0.8835210485791538, "grad_norm": 2.8711907863616943, "learning_rate": 7.0300392241751e-06, "loss": 1.7666, "step": 24671 }, { "epoch": 0.8835568607087221, "grad_norm": 1.3507050275802612, "learning_rate": 7.025767716614928e-06, "loss": 1.2691, "step": 24672 }, { "epoch": 0.8835926728382903, "grad_norm": 1.9385850429534912, "learning_rate": 7.021497459903137e-06, "loss": 1.6612, "step": 24673 }, { "epoch": 0.8836284849678586, "grad_norm": 2.0036303997039795, "learning_rate": 7.017228454097136e-06, "loss": 1.4667, "step": 24674 }, { "epoch": 0.8836642970974269, "grad_norm": 1.8919461965560913, "learning_rate": 7.012960699254423e-06, "loss": 1.2746, "step": 24675 }, { "epoch": 0.8837001092269952, "grad_norm": 1.8504756689071655, "learning_rate": 7.0086941954323634e-06, "loss": 1.3695, "step": 24676 }, { "epoch": 0.8837359213565634, "grad_norm": 1.6944911479949951, "learning_rate": 7.004428942688379e-06, "loss": 1.3329, "step": 24677 }, { "epoch": 0.8837717334861318, "grad_norm": 1.8815547227859497, "learning_rate": 7.000164941079846e-06, "loss": 1.3605, "step": 24678 }, { "epoch": 0.8838075456157001, "grad_norm": 1.4075126647949219, "learning_rate": 6.995902190664116e-06, "loss": 1.439, "step": 24679 }, { "epoch": 0.8838433577452683, "grad_norm": 1.4690585136413574, "learning_rate": 6.99164069149858e-06, "loss": 1.342, "step": 24680 }, { "epoch": 0.8838791698748366, "grad_norm": 1.5376348495483398, "learning_rate": 6.9873804436405345e-06, "loss": 1.4158, "step": 24681 }, { "epoch": 0.8839149820044049, "grad_norm": 1.4911822080612183, "learning_rate": 6.9831214471473e-06, "loss": 1.1987, "step": 24682 }, { "epoch": 0.8839507941339732, "grad_norm": 1.6924875974655151, "learning_rate": 6.978863702076188e-06, "loss": 1.4808, "step": 24683 }, { "epoch": 0.8839866062635414, "grad_norm": 1.6052711009979248, "learning_rate": 6.974607208484496e-06, "loss": 1.5073, "step": 24684 }, { "epoch": 0.8840224183931098, "grad_norm": 1.507482886314392, "learning_rate": 6.970351966429445e-06, "loss": 1.3385, "step": 24685 }, { "epoch": 0.8840582305226781, "grad_norm": 1.743933081626892, "learning_rate": 6.966097975968311e-06, "loss": 1.159, "step": 24686 }, { "epoch": 0.8840940426522463, "grad_norm": 1.7662138938903809, "learning_rate": 6.961845237158337e-06, "loss": 1.4932, "step": 24687 }, { "epoch": 0.8841298547818146, "grad_norm": 1.4913418292999268, "learning_rate": 6.957593750056712e-06, "loss": 1.1511, "step": 24688 }, { "epoch": 0.8841656669113829, "grad_norm": 1.4417868852615356, "learning_rate": 6.953343514720656e-06, "loss": 1.5253, "step": 24689 }, { "epoch": 0.8842014790409511, "grad_norm": 1.7345951795578003, "learning_rate": 6.949094531207334e-06, "loss": 1.3248, "step": 24690 }, { "epoch": 0.8842372911705194, "grad_norm": 1.930124044418335, "learning_rate": 6.944846799573934e-06, "loss": 1.337, "step": 24691 }, { "epoch": 0.8842731033000878, "grad_norm": 2.2706034183502197, "learning_rate": 6.940600319877566e-06, "loss": 1.4853, "step": 24692 }, { "epoch": 0.8843089154296561, "grad_norm": 1.682254672050476, "learning_rate": 6.936355092175384e-06, "loss": 1.4649, "step": 24693 }, { "epoch": 0.8843447275592243, "grad_norm": 1.652011752128601, "learning_rate": 6.932111116524509e-06, "loss": 1.3665, "step": 24694 }, { "epoch": 0.8843805396887926, "grad_norm": 1.7562085390090942, "learning_rate": 6.92786839298204e-06, "loss": 1.4918, "step": 24695 }, { "epoch": 0.8844163518183609, "grad_norm": 1.5120139122009277, "learning_rate": 6.923626921605031e-06, "loss": 1.2655, "step": 24696 }, { "epoch": 0.8844521639479291, "grad_norm": 1.5884032249450684, "learning_rate": 6.9193867024505695e-06, "loss": 1.5416, "step": 24697 }, { "epoch": 0.8844879760774974, "grad_norm": 1.2141194343566895, "learning_rate": 6.9151477355757e-06, "loss": 1.3824, "step": 24698 }, { "epoch": 0.8845237882070658, "grad_norm": 1.8356338739395142, "learning_rate": 6.910910021037431e-06, "loss": 1.3849, "step": 24699 }, { "epoch": 0.884559600336634, "grad_norm": 1.3843994140625, "learning_rate": 6.906673558892807e-06, "loss": 1.4973, "step": 24700 }, { "epoch": 0.8845954124662023, "grad_norm": 1.4231609106063843, "learning_rate": 6.902438349198792e-06, "loss": 1.7378, "step": 24701 }, { "epoch": 0.8846312245957706, "grad_norm": 1.7714993953704834, "learning_rate": 6.898204392012408e-06, "loss": 1.7244, "step": 24702 }, { "epoch": 0.8846670367253389, "grad_norm": 1.7537744045257568, "learning_rate": 6.893971687390566e-06, "loss": 1.4904, "step": 24703 }, { "epoch": 0.8847028488549071, "grad_norm": 1.8601810932159424, "learning_rate": 6.889740235390241e-06, "loss": 1.3624, "step": 24704 }, { "epoch": 0.8847386609844754, "grad_norm": 1.8371527194976807, "learning_rate": 6.885510036068377e-06, "loss": 1.6462, "step": 24705 }, { "epoch": 0.8847744731140438, "grad_norm": 2.1824593544006348, "learning_rate": 6.881281089481839e-06, "loss": 1.3354, "step": 24706 }, { "epoch": 0.884810285243612, "grad_norm": 1.5834816694259644, "learning_rate": 6.877053395687561e-06, "loss": 1.5739, "step": 24707 }, { "epoch": 0.8848460973731803, "grad_norm": 1.9149326086044312, "learning_rate": 6.872826954742406e-06, "loss": 1.5595, "step": 24708 }, { "epoch": 0.8848819095027486, "grad_norm": 2.170591354370117, "learning_rate": 6.868601766703253e-06, "loss": 1.2154, "step": 24709 }, { "epoch": 0.8849177216323169, "grad_norm": 1.5158920288085938, "learning_rate": 6.8643778316269226e-06, "loss": 1.2864, "step": 24710 }, { "epoch": 0.8849535337618851, "grad_norm": 1.6316123008728027, "learning_rate": 6.860155149570246e-06, "loss": 1.5006, "step": 24711 }, { "epoch": 0.8849893458914534, "grad_norm": 1.3980909585952759, "learning_rate": 6.855933720590047e-06, "loss": 1.4343, "step": 24712 }, { "epoch": 0.8850251580210218, "grad_norm": 1.5064276456832886, "learning_rate": 6.8517135447431215e-06, "loss": 1.3443, "step": 24713 }, { "epoch": 0.88506097015059, "grad_norm": 1.580866813659668, "learning_rate": 6.847494622086226e-06, "loss": 1.3168, "step": 24714 }, { "epoch": 0.8850967822801583, "grad_norm": 1.6514077186584473, "learning_rate": 6.843276952676125e-06, "loss": 1.3983, "step": 24715 }, { "epoch": 0.8851325944097266, "grad_norm": 1.7922331094741821, "learning_rate": 6.839060536569597e-06, "loss": 1.4731, "step": 24716 }, { "epoch": 0.8851684065392948, "grad_norm": 1.8965702056884766, "learning_rate": 6.834845373823317e-06, "loss": 1.2081, "step": 24717 }, { "epoch": 0.8852042186688631, "grad_norm": 1.391337275505066, "learning_rate": 6.830631464494019e-06, "loss": 1.4714, "step": 24718 }, { "epoch": 0.8852400307984314, "grad_norm": 1.5723334550857544, "learning_rate": 6.826418808638391e-06, "loss": 1.647, "step": 24719 }, { "epoch": 0.8852758429279998, "grad_norm": 1.616264820098877, "learning_rate": 6.82220740631313e-06, "loss": 1.3395, "step": 24720 }, { "epoch": 0.885311655057568, "grad_norm": 1.7802958488464355, "learning_rate": 6.8179972575748706e-06, "loss": 1.3974, "step": 24721 }, { "epoch": 0.8853474671871363, "grad_norm": 1.9190346002578735, "learning_rate": 6.813788362480256e-06, "loss": 1.4053, "step": 24722 }, { "epoch": 0.8853832793167046, "grad_norm": 1.4406262636184692, "learning_rate": 6.809580721085929e-06, "loss": 1.5542, "step": 24723 }, { "epoch": 0.8854190914462728, "grad_norm": 1.6550371646881104, "learning_rate": 6.805374333448478e-06, "loss": 1.1518, "step": 24724 }, { "epoch": 0.8854549035758411, "grad_norm": 1.6076737642288208, "learning_rate": 6.801169199624502e-06, "loss": 1.3962, "step": 24725 }, { "epoch": 0.8854907157054094, "grad_norm": 1.676985263824463, "learning_rate": 6.796965319670568e-06, "loss": 1.6268, "step": 24726 }, { "epoch": 0.8855265278349778, "grad_norm": 2.0720744132995605, "learning_rate": 6.792762693643262e-06, "loss": 1.3802, "step": 24727 }, { "epoch": 0.885562339964546, "grad_norm": 1.511457085609436, "learning_rate": 6.7885613215990965e-06, "loss": 1.3425, "step": 24728 }, { "epoch": 0.8855981520941143, "grad_norm": 2.5622329711914062, "learning_rate": 6.7843612035945915e-06, "loss": 1.6844, "step": 24729 }, { "epoch": 0.8856339642236826, "grad_norm": 1.8360401391983032, "learning_rate": 6.78016233968628e-06, "loss": 1.3572, "step": 24730 }, { "epoch": 0.8856697763532508, "grad_norm": 1.9595917463302612, "learning_rate": 6.775964729930651e-06, "loss": 1.4014, "step": 24731 }, { "epoch": 0.8857055884828191, "grad_norm": 1.515363097190857, "learning_rate": 6.771768374384168e-06, "loss": 1.1346, "step": 24732 }, { "epoch": 0.8857414006123874, "grad_norm": 1.5928148031234741, "learning_rate": 6.767573273103245e-06, "loss": 1.3723, "step": 24733 }, { "epoch": 0.8857772127419558, "grad_norm": 1.628875970840454, "learning_rate": 6.7633794261444005e-06, "loss": 1.5718, "step": 24734 }, { "epoch": 0.885813024871524, "grad_norm": 1.8851513862609863, "learning_rate": 6.7591868335640016e-06, "loss": 1.4976, "step": 24735 }, { "epoch": 0.8858488370010923, "grad_norm": 1.4460524320602417, "learning_rate": 6.754995495418482e-06, "loss": 1.467, "step": 24736 }, { "epoch": 0.8858846491306606, "grad_norm": 2.1435530185699463, "learning_rate": 6.750805411764205e-06, "loss": 1.3132, "step": 24737 }, { "epoch": 0.8859204612602288, "grad_norm": 1.743093490600586, "learning_rate": 6.746616582657583e-06, "loss": 1.4094, "step": 24738 }, { "epoch": 0.8859562733897971, "grad_norm": 2.133668899536133, "learning_rate": 6.742429008154927e-06, "loss": 1.4657, "step": 24739 }, { "epoch": 0.8859920855193654, "grad_norm": 1.4859719276428223, "learning_rate": 6.738242688312602e-06, "loss": 1.3478, "step": 24740 }, { "epoch": 0.8860278976489337, "grad_norm": 2.430969715118408, "learning_rate": 6.734057623186929e-06, "loss": 1.3177, "step": 24741 }, { "epoch": 0.886063709778502, "grad_norm": 1.748104453086853, "learning_rate": 6.729873812834198e-06, "loss": 1.2021, "step": 24742 }, { "epoch": 0.8860995219080703, "grad_norm": 2.162109375, "learning_rate": 6.725691257310718e-06, "loss": 1.6393, "step": 24743 }, { "epoch": 0.8861353340376386, "grad_norm": 1.8088104724884033, "learning_rate": 6.721509956672711e-06, "loss": 1.4486, "step": 24744 }, { "epoch": 0.8861711461672068, "grad_norm": 1.3711811304092407, "learning_rate": 6.7173299109765e-06, "loss": 1.4542, "step": 24745 }, { "epoch": 0.8862069582967751, "grad_norm": 2.1322035789489746, "learning_rate": 6.713151120278283e-06, "loss": 1.5574, "step": 24746 }, { "epoch": 0.8862427704263434, "grad_norm": 2.123325824737549, "learning_rate": 6.7089735846342815e-06, "loss": 1.3594, "step": 24747 }, { "epoch": 0.8862785825559117, "grad_norm": 1.7514166831970215, "learning_rate": 6.704797304100707e-06, "loss": 1.2006, "step": 24748 }, { "epoch": 0.88631439468548, "grad_norm": 1.8067326545715332, "learning_rate": 6.700622278733748e-06, "loss": 1.5183, "step": 24749 }, { "epoch": 0.8863502068150483, "grad_norm": 1.8331403732299805, "learning_rate": 6.69644850858957e-06, "loss": 1.411, "step": 24750 }, { "epoch": 0.8863860189446165, "grad_norm": 1.604770541191101, "learning_rate": 6.692275993724295e-06, "loss": 1.3225, "step": 24751 }, { "epoch": 0.8864218310741848, "grad_norm": 2.0157501697540283, "learning_rate": 6.688104734194123e-06, "loss": 1.3786, "step": 24752 }, { "epoch": 0.8864576432037531, "grad_norm": 1.6937917470932007, "learning_rate": 6.683934730055119e-06, "loss": 1.3432, "step": 24753 }, { "epoch": 0.8864934553333214, "grad_norm": 1.6693098545074463, "learning_rate": 6.679765981363417e-06, "loss": 1.6571, "step": 24754 }, { "epoch": 0.8865292674628896, "grad_norm": 1.3133947849273682, "learning_rate": 6.675598488175061e-06, "loss": 1.4155, "step": 24755 }, { "epoch": 0.886565079592458, "grad_norm": 1.407610297203064, "learning_rate": 6.671432250546184e-06, "loss": 1.4084, "step": 24756 }, { "epoch": 0.8866008917220263, "grad_norm": 1.6117279529571533, "learning_rate": 6.6672672685327955e-06, "loss": 1.511, "step": 24757 }, { "epoch": 0.8866367038515945, "grad_norm": 1.8112707138061523, "learning_rate": 6.663103542190918e-06, "loss": 1.7054, "step": 24758 }, { "epoch": 0.8866725159811628, "grad_norm": 1.5144332647323608, "learning_rate": 6.658941071576597e-06, "loss": 1.3524, "step": 24759 }, { "epoch": 0.8867083281107311, "grad_norm": 1.8527796268463135, "learning_rate": 6.654779856745807e-06, "loss": 1.6036, "step": 24760 }, { "epoch": 0.8867441402402993, "grad_norm": 1.8971564769744873, "learning_rate": 6.650619897754573e-06, "loss": 1.2975, "step": 24761 }, { "epoch": 0.8867799523698676, "grad_norm": 1.2585480213165283, "learning_rate": 6.646461194658804e-06, "loss": 1.4554, "step": 24762 }, { "epoch": 0.886815764499436, "grad_norm": 1.744014024734497, "learning_rate": 6.642303747514511e-06, "loss": 1.4196, "step": 24763 }, { "epoch": 0.8868515766290043, "grad_norm": 1.7813466787338257, "learning_rate": 6.638147556377583e-06, "loss": 1.1593, "step": 24764 }, { "epoch": 0.8868873887585725, "grad_norm": 1.533890962600708, "learning_rate": 6.633992621303975e-06, "loss": 1.6984, "step": 24765 }, { "epoch": 0.8869232008881408, "grad_norm": 1.2901742458343506, "learning_rate": 6.629838942349542e-06, "loss": 1.3292, "step": 24766 }, { "epoch": 0.8869590130177091, "grad_norm": 1.23209810256958, "learning_rate": 6.625686519570184e-06, "loss": 1.5023, "step": 24767 }, { "epoch": 0.8869948251472773, "grad_norm": 1.7562414407730103, "learning_rate": 6.621535353021791e-06, "loss": 1.3996, "step": 24768 }, { "epoch": 0.8870306372768456, "grad_norm": 1.7048919200897217, "learning_rate": 6.617385442760171e-06, "loss": 1.4065, "step": 24769 }, { "epoch": 0.887066449406414, "grad_norm": 2.8547306060791016, "learning_rate": 6.61323678884117e-06, "loss": 1.6266, "step": 24770 }, { "epoch": 0.8871022615359823, "grad_norm": 1.4367671012878418, "learning_rate": 6.6090893913206106e-06, "loss": 1.5252, "step": 24771 }, { "epoch": 0.8871380736655505, "grad_norm": 1.2508554458618164, "learning_rate": 6.604943250254303e-06, "loss": 0.8535, "step": 24772 }, { "epoch": 0.8871738857951188, "grad_norm": 1.5373388528823853, "learning_rate": 6.600798365697991e-06, "loss": 1.3155, "step": 24773 }, { "epoch": 0.8872096979246871, "grad_norm": 1.792191743850708, "learning_rate": 6.596654737707486e-06, "loss": 1.4589, "step": 24774 }, { "epoch": 0.8872455100542553, "grad_norm": 2.387756109237671, "learning_rate": 6.592512366338499e-06, "loss": 1.5162, "step": 24775 }, { "epoch": 0.8872813221838236, "grad_norm": 1.9498742818832397, "learning_rate": 6.588371251646774e-06, "loss": 1.6594, "step": 24776 }, { "epoch": 0.887317134313392, "grad_norm": 1.4522777795791626, "learning_rate": 6.584231393688012e-06, "loss": 1.5712, "step": 24777 }, { "epoch": 0.8873529464429603, "grad_norm": 1.6063610315322876, "learning_rate": 6.5800927925179115e-06, "loss": 1.3801, "step": 24778 }, { "epoch": 0.8873887585725285, "grad_norm": 1.822343111038208, "learning_rate": 6.575955448192184e-06, "loss": 1.4519, "step": 24779 }, { "epoch": 0.8874245707020968, "grad_norm": 2.020034074783325, "learning_rate": 6.5718193607664516e-06, "loss": 1.1206, "step": 24780 }, { "epoch": 0.8874603828316651, "grad_norm": 2.021859884262085, "learning_rate": 6.5676845302963805e-06, "loss": 1.403, "step": 24781 }, { "epoch": 0.8874961949612333, "grad_norm": 1.5246386528015137, "learning_rate": 6.563550956837594e-06, "loss": 1.1573, "step": 24782 }, { "epoch": 0.8875320070908016, "grad_norm": 1.686280369758606, "learning_rate": 6.559418640445714e-06, "loss": 1.4514, "step": 24783 }, { "epoch": 0.88756781922037, "grad_norm": 1.4774922132492065, "learning_rate": 6.555287581176317e-06, "loss": 1.606, "step": 24784 }, { "epoch": 0.8876036313499382, "grad_norm": 1.6550049781799316, "learning_rate": 6.551157779084982e-06, "loss": 1.2363, "step": 24785 }, { "epoch": 0.8876394434795065, "grad_norm": 1.3732515573501587, "learning_rate": 6.547029234227298e-06, "loss": 1.4708, "step": 24786 }, { "epoch": 0.8876752556090748, "grad_norm": 1.4818493127822876, "learning_rate": 6.5429019466587745e-06, "loss": 1.5955, "step": 24787 }, { "epoch": 0.8877110677386431, "grad_norm": 1.542306900024414, "learning_rate": 6.5387759164349585e-06, "loss": 1.4008, "step": 24788 }, { "epoch": 0.8877468798682113, "grad_norm": 1.4538143873214722, "learning_rate": 6.5346511436113585e-06, "loss": 1.2013, "step": 24789 }, { "epoch": 0.8877826919977796, "grad_norm": 2.1333580017089844, "learning_rate": 6.5305276282434765e-06, "loss": 1.4146, "step": 24790 }, { "epoch": 0.887818504127348, "grad_norm": 1.75994873046875, "learning_rate": 6.526405370386757e-06, "loss": 1.4529, "step": 24791 }, { "epoch": 0.8878543162569162, "grad_norm": 1.6889221668243408, "learning_rate": 6.522284370096687e-06, "loss": 1.5275, "step": 24792 }, { "epoch": 0.8878901283864845, "grad_norm": 1.9283156394958496, "learning_rate": 6.518164627428724e-06, "loss": 1.5469, "step": 24793 }, { "epoch": 0.8879259405160528, "grad_norm": 1.4188671112060547, "learning_rate": 6.514046142438246e-06, "loss": 1.3232, "step": 24794 }, { "epoch": 0.887961752645621, "grad_norm": 2.01699161529541, "learning_rate": 6.509928915180697e-06, "loss": 1.5934, "step": 24795 }, { "epoch": 0.8879975647751893, "grad_norm": 1.3128671646118164, "learning_rate": 6.505812945711454e-06, "loss": 1.2986, "step": 24796 }, { "epoch": 0.8880333769047576, "grad_norm": 1.3702200651168823, "learning_rate": 6.501698234085929e-06, "loss": 1.4046, "step": 24797 }, { "epoch": 0.888069189034326, "grad_norm": 1.5460312366485596, "learning_rate": 6.497584780359423e-06, "loss": 1.3257, "step": 24798 }, { "epoch": 0.8881050011638942, "grad_norm": 1.7780214548110962, "learning_rate": 6.4934725845873016e-06, "loss": 1.5938, "step": 24799 }, { "epoch": 0.8881408132934625, "grad_norm": 1.7738583087921143, "learning_rate": 6.489361646824898e-06, "loss": 1.3187, "step": 24800 }, { "epoch": 0.8881766254230308, "grad_norm": 1.6108421087265015, "learning_rate": 6.485251967127526e-06, "loss": 1.6039, "step": 24801 }, { "epoch": 0.888212437552599, "grad_norm": 1.5561243295669556, "learning_rate": 6.48114354555045e-06, "loss": 1.3836, "step": 24802 }, { "epoch": 0.8882482496821673, "grad_norm": 1.284730076789856, "learning_rate": 6.47703638214896e-06, "loss": 1.3728, "step": 24803 }, { "epoch": 0.8882840618117356, "grad_norm": 1.3639391660690308, "learning_rate": 6.4729304769783225e-06, "loss": 1.3553, "step": 24804 }, { "epoch": 0.888319873941304, "grad_norm": 1.3871612548828125, "learning_rate": 6.468825830093739e-06, "loss": 1.626, "step": 24805 }, { "epoch": 0.8883556860708722, "grad_norm": 1.5964213609695435, "learning_rate": 6.4647224415504745e-06, "loss": 1.4741, "step": 24806 }, { "epoch": 0.8883914982004405, "grad_norm": 1.4523588418960571, "learning_rate": 6.460620311403709e-06, "loss": 1.5063, "step": 24807 }, { "epoch": 0.8884273103300088, "grad_norm": 1.8651169538497925, "learning_rate": 6.456519439708653e-06, "loss": 1.5239, "step": 24808 }, { "epoch": 0.888463122459577, "grad_norm": 1.986107349395752, "learning_rate": 6.452419826520451e-06, "loss": 1.4542, "step": 24809 }, { "epoch": 0.8884989345891453, "grad_norm": 1.5596935749053955, "learning_rate": 6.44832147189427e-06, "loss": 1.439, "step": 24810 }, { "epoch": 0.8885347467187136, "grad_norm": 1.6743347644805908, "learning_rate": 6.444224375885277e-06, "loss": 1.5254, "step": 24811 }, { "epoch": 0.888570558848282, "grad_norm": 1.4157236814498901, "learning_rate": 6.44012853854854e-06, "loss": 1.3591, "step": 24812 }, { "epoch": 0.8886063709778502, "grad_norm": 1.396809458732605, "learning_rate": 6.436033959939192e-06, "loss": 1.1897, "step": 24813 }, { "epoch": 0.8886421831074185, "grad_norm": 1.3337267637252808, "learning_rate": 6.431940640112322e-06, "loss": 1.7, "step": 24814 }, { "epoch": 0.8886779952369868, "grad_norm": 1.819524884223938, "learning_rate": 6.4278485791230195e-06, "loss": 1.5505, "step": 24815 }, { "epoch": 0.888713807366555, "grad_norm": 1.781790018081665, "learning_rate": 6.423757777026285e-06, "loss": 1.2891, "step": 24816 }, { "epoch": 0.8887496194961233, "grad_norm": 1.4497478008270264, "learning_rate": 6.419668233877197e-06, "loss": 1.448, "step": 24817 }, { "epoch": 0.8887854316256916, "grad_norm": 1.353127121925354, "learning_rate": 6.415579949730755e-06, "loss": 1.514, "step": 24818 }, { "epoch": 0.88882124375526, "grad_norm": 1.5938628911972046, "learning_rate": 6.411492924641982e-06, "loss": 1.3923, "step": 24819 }, { "epoch": 0.8888570558848282, "grad_norm": 1.3531911373138428, "learning_rate": 6.407407158665846e-06, "loss": 1.4075, "step": 24820 }, { "epoch": 0.8888928680143965, "grad_norm": 1.8812788724899292, "learning_rate": 6.403322651857313e-06, "loss": 1.4905, "step": 24821 }, { "epoch": 0.8889286801439648, "grad_norm": 1.4424673318862915, "learning_rate": 6.399239404271362e-06, "loss": 1.4742, "step": 24822 }, { "epoch": 0.888964492273533, "grad_norm": 1.4213186502456665, "learning_rate": 6.395157415962894e-06, "loss": 1.4607, "step": 24823 }, { "epoch": 0.8890003044031013, "grad_norm": 1.715586543083191, "learning_rate": 6.39107668698683e-06, "loss": 1.298, "step": 24824 }, { "epoch": 0.8890361165326696, "grad_norm": 1.4657044410705566, "learning_rate": 6.386997217398094e-06, "loss": 1.6368, "step": 24825 }, { "epoch": 0.8890719286622379, "grad_norm": 1.7812143564224243, "learning_rate": 6.382919007251575e-06, "loss": 1.4923, "step": 24826 }, { "epoch": 0.8891077407918062, "grad_norm": 2.4855334758758545, "learning_rate": 6.378842056602097e-06, "loss": 1.3953, "step": 24827 }, { "epoch": 0.8891435529213745, "grad_norm": 1.778143286705017, "learning_rate": 6.374766365504547e-06, "loss": 1.3601, "step": 24828 }, { "epoch": 0.8891793650509427, "grad_norm": 1.8928757905960083, "learning_rate": 6.370691934013761e-06, "loss": 1.29, "step": 24829 }, { "epoch": 0.889215177180511, "grad_norm": 1.5196993350982666, "learning_rate": 6.366618762184529e-06, "loss": 1.4735, "step": 24830 }, { "epoch": 0.8892509893100793, "grad_norm": 1.5085698366165161, "learning_rate": 6.36254685007166e-06, "loss": 1.7028, "step": 24831 }, { "epoch": 0.8892868014396476, "grad_norm": 1.4420183897018433, "learning_rate": 6.358476197729934e-06, "loss": 1.086, "step": 24832 }, { "epoch": 0.8893226135692159, "grad_norm": 1.4737896919250488, "learning_rate": 6.3544068052141415e-06, "loss": 1.3265, "step": 24833 }, { "epoch": 0.8893584256987842, "grad_norm": 1.763348937034607, "learning_rate": 6.3503386725790034e-06, "loss": 1.3154, "step": 24834 }, { "epoch": 0.8893942378283525, "grad_norm": 1.7766228914260864, "learning_rate": 6.346271799879244e-06, "loss": 1.1381, "step": 24835 }, { "epoch": 0.8894300499579207, "grad_norm": 1.7439228296279907, "learning_rate": 6.342206187169608e-06, "loss": 1.1344, "step": 24836 }, { "epoch": 0.889465862087489, "grad_norm": 1.729805588722229, "learning_rate": 6.338141834504785e-06, "loss": 1.3619, "step": 24837 }, { "epoch": 0.8895016742170573, "grad_norm": 2.5667622089385986, "learning_rate": 6.3340787419394535e-06, "loss": 1.2524, "step": 24838 }, { "epoch": 0.8895374863466255, "grad_norm": 1.547900676727295, "learning_rate": 6.330016909528236e-06, "loss": 1.2293, "step": 24839 }, { "epoch": 0.8895732984761939, "grad_norm": 1.8758774995803833, "learning_rate": 6.325956337325845e-06, "loss": 1.3833, "step": 24840 }, { "epoch": 0.8896091106057622, "grad_norm": 1.384765625, "learning_rate": 6.321897025386869e-06, "loss": 1.1106, "step": 24841 }, { "epoch": 0.8896449227353305, "grad_norm": 1.5886365175247192, "learning_rate": 6.317838973765944e-06, "loss": 1.6064, "step": 24842 }, { "epoch": 0.8896807348648987, "grad_norm": 1.6912497282028198, "learning_rate": 6.313782182517636e-06, "loss": 1.5548, "step": 24843 }, { "epoch": 0.889716546994467, "grad_norm": 1.452126145362854, "learning_rate": 6.309726651696557e-06, "loss": 1.4011, "step": 24844 }, { "epoch": 0.8897523591240353, "grad_norm": 1.428489089012146, "learning_rate": 6.305672381357264e-06, "loss": 1.322, "step": 24845 }, { "epoch": 0.8897881712536035, "grad_norm": 1.99895179271698, "learning_rate": 6.301619371554257e-06, "loss": 1.6756, "step": 24846 }, { "epoch": 0.8898239833831719, "grad_norm": 2.284052848815918, "learning_rate": 6.297567622342127e-06, "loss": 1.495, "step": 24847 }, { "epoch": 0.8898597955127402, "grad_norm": 1.3983558416366577, "learning_rate": 6.29351713377535e-06, "loss": 1.1678, "step": 24848 }, { "epoch": 0.8898956076423085, "grad_norm": 1.4451669454574585, "learning_rate": 6.289467905908442e-06, "loss": 1.4856, "step": 24849 }, { "epoch": 0.8899314197718767, "grad_norm": 1.845213770866394, "learning_rate": 6.285419938795833e-06, "loss": 1.5874, "step": 24850 }, { "epoch": 0.889967231901445, "grad_norm": 1.7814226150512695, "learning_rate": 6.281373232492038e-06, "loss": 1.3611, "step": 24851 }, { "epoch": 0.8900030440310133, "grad_norm": 2.072495222091675, "learning_rate": 6.2773277870514675e-06, "loss": 1.3698, "step": 24852 }, { "epoch": 0.8900388561605815, "grad_norm": 1.623978853225708, "learning_rate": 6.273283602528579e-06, "loss": 1.0922, "step": 24853 }, { "epoch": 0.8900746682901499, "grad_norm": 1.4474862813949585, "learning_rate": 6.269240678977739e-06, "loss": 1.3673, "step": 24854 }, { "epoch": 0.8901104804197182, "grad_norm": 1.9575895071029663, "learning_rate": 6.265199016453371e-06, "loss": 1.5366, "step": 24855 }, { "epoch": 0.8901462925492865, "grad_norm": 1.3855613470077515, "learning_rate": 6.261158615009843e-06, "loss": 1.3095, "step": 24856 }, { "epoch": 0.8901821046788547, "grad_norm": 1.46273934841156, "learning_rate": 6.25711947470149e-06, "loss": 1.7024, "step": 24857 }, { "epoch": 0.890217916808423, "grad_norm": 1.435225248336792, "learning_rate": 6.253081595582699e-06, "loss": 1.3673, "step": 24858 }, { "epoch": 0.8902537289379913, "grad_norm": 1.5706956386566162, "learning_rate": 6.249044977707763e-06, "loss": 1.3358, "step": 24859 }, { "epoch": 0.8902895410675595, "grad_norm": 1.7113782167434692, "learning_rate": 6.245009621131004e-06, "loss": 1.4194, "step": 24860 }, { "epoch": 0.8903253531971279, "grad_norm": 1.5109610557556152, "learning_rate": 6.2409755259066786e-06, "loss": 1.3438, "step": 24861 }, { "epoch": 0.8903611653266962, "grad_norm": 1.6783028841018677, "learning_rate": 6.23694269208912e-06, "loss": 1.2793, "step": 24862 }, { "epoch": 0.8903969774562644, "grad_norm": 2.1244373321533203, "learning_rate": 6.232911119732554e-06, "loss": 1.4257, "step": 24863 }, { "epoch": 0.8904327895858327, "grad_norm": 1.8511030673980713, "learning_rate": 6.228880808891202e-06, "loss": 1.1956, "step": 24864 }, { "epoch": 0.890468601715401, "grad_norm": 1.8468077182769775, "learning_rate": 6.224851759619299e-06, "loss": 1.4459, "step": 24865 }, { "epoch": 0.8905044138449693, "grad_norm": 1.4527554512023926, "learning_rate": 6.220823971971046e-06, "loss": 1.4473, "step": 24866 }, { "epoch": 0.8905402259745375, "grad_norm": 1.7244460582733154, "learning_rate": 6.216797446000666e-06, "loss": 1.3496, "step": 24867 }, { "epoch": 0.8905760381041059, "grad_norm": 1.394762635231018, "learning_rate": 6.212772181762283e-06, "loss": 1.3262, "step": 24868 }, { "epoch": 0.8906118502336742, "grad_norm": 1.723497748374939, "learning_rate": 6.208748179310087e-06, "loss": 1.1949, "step": 24869 }, { "epoch": 0.8906476623632424, "grad_norm": 1.4614174365997314, "learning_rate": 6.204725438698189e-06, "loss": 1.5436, "step": 24870 }, { "epoch": 0.8906834744928107, "grad_norm": 1.726501703262329, "learning_rate": 6.200703959980747e-06, "loss": 1.3518, "step": 24871 }, { "epoch": 0.890719286622379, "grad_norm": 1.5753802061080933, "learning_rate": 6.196683743211818e-06, "loss": 1.4447, "step": 24872 }, { "epoch": 0.8907550987519472, "grad_norm": 1.9597927331924438, "learning_rate": 6.192664788445513e-06, "loss": 1.5194, "step": 24873 }, { "epoch": 0.8907909108815155, "grad_norm": 1.322771668434143, "learning_rate": 6.188647095735911e-06, "loss": 1.6306, "step": 24874 }, { "epoch": 0.8908267230110839, "grad_norm": 1.6770081520080566, "learning_rate": 6.184630665137048e-06, "loss": 1.2971, "step": 24875 }, { "epoch": 0.8908625351406522, "grad_norm": 1.794987440109253, "learning_rate": 6.180615496702968e-06, "loss": 1.5026, "step": 24876 }, { "epoch": 0.8908983472702204, "grad_norm": 1.406591534614563, "learning_rate": 6.176601590487685e-06, "loss": 1.1957, "step": 24877 }, { "epoch": 0.8909341593997887, "grad_norm": 1.7108861207962036, "learning_rate": 6.17258894654521e-06, "loss": 1.3956, "step": 24878 }, { "epoch": 0.890969971529357, "grad_norm": 1.513010859489441, "learning_rate": 6.168577564929523e-06, "loss": 1.6367, "step": 24879 }, { "epoch": 0.8910057836589252, "grad_norm": 1.442939043045044, "learning_rate": 6.16456744569458e-06, "loss": 1.1821, "step": 24880 }, { "epoch": 0.8910415957884935, "grad_norm": 2.025557279586792, "learning_rate": 6.160558588894361e-06, "loss": 1.5068, "step": 24881 }, { "epoch": 0.8910774079180619, "grad_norm": 1.4389938116073608, "learning_rate": 6.156550994582766e-06, "loss": 1.0765, "step": 24882 }, { "epoch": 0.8911132200476302, "grad_norm": 1.3525745868682861, "learning_rate": 6.1525446628137306e-06, "loss": 1.2608, "step": 24883 }, { "epoch": 0.8911490321771984, "grad_norm": 1.968949794769287, "learning_rate": 6.148539593641156e-06, "loss": 1.5984, "step": 24884 }, { "epoch": 0.8911848443067667, "grad_norm": 1.5880968570709229, "learning_rate": 6.144535787118921e-06, "loss": 1.5123, "step": 24885 }, { "epoch": 0.891220656436335, "grad_norm": 2.515672206878662, "learning_rate": 6.140533243300894e-06, "loss": 1.5112, "step": 24886 }, { "epoch": 0.8912564685659032, "grad_norm": 1.2918782234191895, "learning_rate": 6.13653196224091e-06, "loss": 1.0852, "step": 24887 }, { "epoch": 0.8912922806954715, "grad_norm": 1.7701398134231567, "learning_rate": 6.132531943992826e-06, "loss": 1.1541, "step": 24888 }, { "epoch": 0.8913280928250399, "grad_norm": 1.8440961837768555, "learning_rate": 6.128533188610453e-06, "loss": 1.3715, "step": 24889 }, { "epoch": 0.8913639049546082, "grad_norm": 2.0885820388793945, "learning_rate": 6.124535696147559e-06, "loss": 1.8181, "step": 24890 }, { "epoch": 0.8913997170841764, "grad_norm": 1.7771596908569336, "learning_rate": 6.12053946665796e-06, "loss": 1.2956, "step": 24891 }, { "epoch": 0.8914355292137447, "grad_norm": 1.7697362899780273, "learning_rate": 6.1165445001954095e-06, "loss": 1.2343, "step": 24892 }, { "epoch": 0.891471341343313, "grad_norm": 2.0563573837280273, "learning_rate": 6.112550796813643e-06, "loss": 1.5779, "step": 24893 }, { "epoch": 0.8915071534728812, "grad_norm": 1.5106770992279053, "learning_rate": 6.108558356566396e-06, "loss": 1.5096, "step": 24894 }, { "epoch": 0.8915429656024495, "grad_norm": 1.5133570432662964, "learning_rate": 6.104567179507381e-06, "loss": 1.21, "step": 24895 }, { "epoch": 0.8915787777320179, "grad_norm": 1.82058584690094, "learning_rate": 6.100577265690321e-06, "loss": 1.6064, "step": 24896 }, { "epoch": 0.8916145898615861, "grad_norm": 1.4742602109909058, "learning_rate": 6.096588615168864e-06, "loss": 1.2988, "step": 24897 }, { "epoch": 0.8916504019911544, "grad_norm": 1.487149953842163, "learning_rate": 6.092601227996664e-06, "loss": 1.4094, "step": 24898 }, { "epoch": 0.8916862141207227, "grad_norm": 1.983708381652832, "learning_rate": 6.088615104227413e-06, "loss": 1.4038, "step": 24899 }, { "epoch": 0.891722026250291, "grad_norm": 1.6666429042816162, "learning_rate": 6.084630243914679e-06, "loss": 1.379, "step": 24900 }, { "epoch": 0.8917578383798592, "grad_norm": 1.616948127746582, "learning_rate": 6.080646647112109e-06, "loss": 1.3444, "step": 24901 }, { "epoch": 0.8917936505094275, "grad_norm": 1.2481621503829956, "learning_rate": 6.076664313873293e-06, "loss": 1.2012, "step": 24902 }, { "epoch": 0.8918294626389959, "grad_norm": 1.9596446752548218, "learning_rate": 6.07268324425182e-06, "loss": 1.6332, "step": 24903 }, { "epoch": 0.8918652747685641, "grad_norm": 2.1639833450317383, "learning_rate": 6.068703438301226e-06, "loss": 1.6922, "step": 24904 }, { "epoch": 0.8919010868981324, "grad_norm": 1.326301097869873, "learning_rate": 6.064724896075058e-06, "loss": 1.5365, "step": 24905 }, { "epoch": 0.8919368990277007, "grad_norm": 1.8309438228607178, "learning_rate": 6.06074761762685e-06, "loss": 1.2234, "step": 24906 }, { "epoch": 0.891972711157269, "grad_norm": 1.4915207624435425, "learning_rate": 6.056771603010125e-06, "loss": 1.4147, "step": 24907 }, { "epoch": 0.8920085232868372, "grad_norm": 1.525928020477295, "learning_rate": 6.052796852278353e-06, "loss": 1.5172, "step": 24908 }, { "epoch": 0.8920443354164055, "grad_norm": 1.383877158164978, "learning_rate": 6.048823365485012e-06, "loss": 1.2705, "step": 24909 }, { "epoch": 0.8920801475459739, "grad_norm": 1.3349719047546387, "learning_rate": 6.044851142683572e-06, "loss": 1.3585, "step": 24910 }, { "epoch": 0.8921159596755421, "grad_norm": 2.2004175186157227, "learning_rate": 6.040880183927455e-06, "loss": 1.3931, "step": 24911 }, { "epoch": 0.8921517718051104, "grad_norm": 1.7021312713623047, "learning_rate": 6.036910489270098e-06, "loss": 1.2756, "step": 24912 }, { "epoch": 0.8921875839346787, "grad_norm": 1.871277093887329, "learning_rate": 6.0329420587649124e-06, "loss": 1.5018, "step": 24913 }, { "epoch": 0.8922233960642469, "grad_norm": 2.3825483322143555, "learning_rate": 6.028974892465289e-06, "loss": 1.2814, "step": 24914 }, { "epoch": 0.8922592081938152, "grad_norm": 2.0105133056640625, "learning_rate": 6.025008990424585e-06, "loss": 1.2743, "step": 24915 }, { "epoch": 0.8922950203233835, "grad_norm": 2.2031428813934326, "learning_rate": 6.021044352696159e-06, "loss": 1.8541, "step": 24916 }, { "epoch": 0.8923308324529519, "grad_norm": 1.5223588943481445, "learning_rate": 6.017080979333378e-06, "loss": 1.491, "step": 24917 }, { "epoch": 0.8923666445825201, "grad_norm": 4.320333957672119, "learning_rate": 6.013118870389523e-06, "loss": 1.7306, "step": 24918 }, { "epoch": 0.8924024567120884, "grad_norm": 1.5868854522705078, "learning_rate": 6.009158025917927e-06, "loss": 1.2724, "step": 24919 }, { "epoch": 0.8924382688416567, "grad_norm": 1.3187925815582275, "learning_rate": 6.00519844597186e-06, "loss": 1.2816, "step": 24920 }, { "epoch": 0.8924740809712249, "grad_norm": 1.4020674228668213, "learning_rate": 6.001240130604624e-06, "loss": 1.484, "step": 24921 }, { "epoch": 0.8925098931007932, "grad_norm": 1.5411213636398315, "learning_rate": 5.997283079869442e-06, "loss": 1.261, "step": 24922 }, { "epoch": 0.8925457052303615, "grad_norm": 2.140064001083374, "learning_rate": 5.993327293819562e-06, "loss": 1.439, "step": 24923 }, { "epoch": 0.8925815173599299, "grad_norm": 1.97054922580719, "learning_rate": 5.989372772508195e-06, "loss": 1.3858, "step": 24924 }, { "epoch": 0.8926173294894981, "grad_norm": 2.1375653743743896, "learning_rate": 5.985419515988566e-06, "loss": 1.3219, "step": 24925 }, { "epoch": 0.8926531416190664, "grad_norm": 1.6442660093307495, "learning_rate": 5.981467524313855e-06, "loss": 1.1661, "step": 24926 }, { "epoch": 0.8926889537486347, "grad_norm": 1.7716608047485352, "learning_rate": 5.977516797537186e-06, "loss": 1.5978, "step": 24927 }, { "epoch": 0.8927247658782029, "grad_norm": 1.8038569688796997, "learning_rate": 5.973567335711783e-06, "loss": 1.3623, "step": 24928 }, { "epoch": 0.8927605780077712, "grad_norm": 2.1996359825134277, "learning_rate": 5.969619138890737e-06, "loss": 1.1365, "step": 24929 }, { "epoch": 0.8927963901373395, "grad_norm": 1.420465350151062, "learning_rate": 5.965672207127171e-06, "loss": 1.3239, "step": 24930 }, { "epoch": 0.8928322022669078, "grad_norm": 1.2334009408950806, "learning_rate": 5.961726540474189e-06, "loss": 1.3553, "step": 24931 }, { "epoch": 0.8928680143964761, "grad_norm": 1.63217294216156, "learning_rate": 5.95778213898488e-06, "loss": 1.382, "step": 24932 }, { "epoch": 0.8929038265260444, "grad_norm": 1.6627821922302246, "learning_rate": 5.9538390027123025e-06, "loss": 1.3492, "step": 24933 }, { "epoch": 0.8929396386556127, "grad_norm": 1.619146466255188, "learning_rate": 5.949897131709514e-06, "loss": 1.6053, "step": 24934 }, { "epoch": 0.8929754507851809, "grad_norm": 1.4433724880218506, "learning_rate": 5.94595652602955e-06, "loss": 1.4331, "step": 24935 }, { "epoch": 0.8930112629147492, "grad_norm": 1.6781110763549805, "learning_rate": 5.9420171857254126e-06, "loss": 1.6084, "step": 24936 }, { "epoch": 0.8930470750443175, "grad_norm": 1.698456883430481, "learning_rate": 5.938079110850114e-06, "loss": 1.4175, "step": 24937 }, { "epoch": 0.8930828871738858, "grad_norm": 2.07592511177063, "learning_rate": 5.934142301456613e-06, "loss": 1.6356, "step": 24938 }, { "epoch": 0.8931186993034541, "grad_norm": 1.7842410802841187, "learning_rate": 5.9302067575979115e-06, "loss": 1.3476, "step": 24939 }, { "epoch": 0.8931545114330224, "grad_norm": 1.0301146507263184, "learning_rate": 5.926272479326922e-06, "loss": 1.4211, "step": 24940 }, { "epoch": 0.8931903235625906, "grad_norm": 1.4850590229034424, "learning_rate": 5.922339466696591e-06, "loss": 1.1895, "step": 24941 }, { "epoch": 0.8932261356921589, "grad_norm": 1.8592171669006348, "learning_rate": 5.918407719759844e-06, "loss": 1.6507, "step": 24942 }, { "epoch": 0.8932619478217272, "grad_norm": 1.9317047595977783, "learning_rate": 5.914477238569549e-06, "loss": 1.2963, "step": 24943 }, { "epoch": 0.8932977599512955, "grad_norm": 2.370089292526245, "learning_rate": 5.91054802317862e-06, "loss": 1.7081, "step": 24944 }, { "epoch": 0.8933335720808638, "grad_norm": 2.0202910900115967, "learning_rate": 5.906620073639868e-06, "loss": 1.5157, "step": 24945 }, { "epoch": 0.8933693842104321, "grad_norm": 1.8120547533035278, "learning_rate": 5.902693390006209e-06, "loss": 1.6128, "step": 24946 }, { "epoch": 0.8934051963400004, "grad_norm": 1.7174150943756104, "learning_rate": 5.89876797233041e-06, "loss": 1.5787, "step": 24947 }, { "epoch": 0.8934410084695686, "grad_norm": 2.4649229049682617, "learning_rate": 5.894843820665319e-06, "loss": 1.7147, "step": 24948 }, { "epoch": 0.8934768205991369, "grad_norm": 1.6594107151031494, "learning_rate": 5.890920935063693e-06, "loss": 1.503, "step": 24949 }, { "epoch": 0.8935126327287052, "grad_norm": 2.6280345916748047, "learning_rate": 5.8869993155783675e-06, "loss": 1.2833, "step": 24950 }, { "epoch": 0.8935484448582734, "grad_norm": 1.1953978538513184, "learning_rate": 5.883078962262056e-06, "loss": 1.6845, "step": 24951 }, { "epoch": 0.8935842569878418, "grad_norm": 1.5348491668701172, "learning_rate": 5.879159875167517e-06, "loss": 1.4533, "step": 24952 }, { "epoch": 0.8936200691174101, "grad_norm": 1.3968113660812378, "learning_rate": 5.875242054347463e-06, "loss": 1.3589, "step": 24953 }, { "epoch": 0.8936558812469784, "grad_norm": 1.4299107789993286, "learning_rate": 5.871325499854618e-06, "loss": 1.2363, "step": 24954 }, { "epoch": 0.8936916933765466, "grad_norm": 1.3905788660049438, "learning_rate": 5.867410211741686e-06, "loss": 1.6219, "step": 24955 }, { "epoch": 0.8937275055061149, "grad_norm": 1.5503168106079102, "learning_rate": 5.863496190061302e-06, "loss": 1.6683, "step": 24956 }, { "epoch": 0.8937633176356832, "grad_norm": 2.048081159591675, "learning_rate": 5.859583434866167e-06, "loss": 1.3548, "step": 24957 }, { "epoch": 0.8937991297652514, "grad_norm": 1.719864010810852, "learning_rate": 5.855671946208896e-06, "loss": 1.3932, "step": 24958 }, { "epoch": 0.8938349418948198, "grad_norm": 1.683786153793335, "learning_rate": 5.851761724142147e-06, "loss": 1.3284, "step": 24959 }, { "epoch": 0.8938707540243881, "grad_norm": 1.8406994342803955, "learning_rate": 5.8478527687184755e-06, "loss": 1.3094, "step": 24960 }, { "epoch": 0.8939065661539564, "grad_norm": 1.4304991960525513, "learning_rate": 5.843945079990498e-06, "loss": 1.4482, "step": 24961 }, { "epoch": 0.8939423782835246, "grad_norm": 1.7396273612976074, "learning_rate": 5.840038658010805e-06, "loss": 1.2779, "step": 24962 }, { "epoch": 0.8939781904130929, "grad_norm": 1.7192277908325195, "learning_rate": 5.83613350283192e-06, "loss": 1.3281, "step": 24963 }, { "epoch": 0.8940140025426612, "grad_norm": 2.3847055435180664, "learning_rate": 5.83222961450639e-06, "loss": 1.6135, "step": 24964 }, { "epoch": 0.8940498146722294, "grad_norm": 2.184673309326172, "learning_rate": 5.828326993086741e-06, "loss": 1.3081, "step": 24965 }, { "epoch": 0.8940856268017978, "grad_norm": 1.49175226688385, "learning_rate": 5.824425638625508e-06, "loss": 1.1297, "step": 24966 }, { "epoch": 0.8941214389313661, "grad_norm": 1.4161489009857178, "learning_rate": 5.820525551175104e-06, "loss": 1.3759, "step": 24967 }, { "epoch": 0.8941572510609344, "grad_norm": 1.6693317890167236, "learning_rate": 5.8166267307880885e-06, "loss": 1.653, "step": 24968 }, { "epoch": 0.8941930631905026, "grad_norm": 1.2906261682510376, "learning_rate": 5.812729177516874e-06, "loss": 1.375, "step": 24969 }, { "epoch": 0.8942288753200709, "grad_norm": 1.3217500448226929, "learning_rate": 5.808832891413873e-06, "loss": 1.1146, "step": 24970 }, { "epoch": 0.8942646874496392, "grad_norm": 1.2847282886505127, "learning_rate": 5.804937872531524e-06, "loss": 1.1061, "step": 24971 }, { "epoch": 0.8943004995792074, "grad_norm": 1.6961958408355713, "learning_rate": 5.8010441209222384e-06, "loss": 1.4849, "step": 24972 }, { "epoch": 0.8943363117087758, "grad_norm": 1.6735053062438965, "learning_rate": 5.797151636638409e-06, "loss": 1.1823, "step": 24973 }, { "epoch": 0.8943721238383441, "grad_norm": 1.3020800352096558, "learning_rate": 5.7932604197323826e-06, "loss": 1.0688, "step": 24974 }, { "epoch": 0.8944079359679123, "grad_norm": 1.8242229223251343, "learning_rate": 5.789370470256517e-06, "loss": 1.2799, "step": 24975 }, { "epoch": 0.8944437480974806, "grad_norm": 1.5822815895080566, "learning_rate": 5.785481788263147e-06, "loss": 1.4551, "step": 24976 }, { "epoch": 0.8944795602270489, "grad_norm": 1.5726370811462402, "learning_rate": 5.7815943738046e-06, "loss": 1.1166, "step": 24977 }, { "epoch": 0.8945153723566172, "grad_norm": 1.3955146074295044, "learning_rate": 5.777708226933165e-06, "loss": 1.4049, "step": 24978 }, { "epoch": 0.8945511844861854, "grad_norm": 1.5697683095932007, "learning_rate": 5.773823347701124e-06, "loss": 1.5281, "step": 24979 }, { "epoch": 0.8945869966157538, "grad_norm": 1.8382532596588135, "learning_rate": 5.7699397361607564e-06, "loss": 1.2936, "step": 24980 }, { "epoch": 0.8946228087453221, "grad_norm": 1.5816271305084229, "learning_rate": 5.766057392364288e-06, "loss": 1.4626, "step": 24981 }, { "epoch": 0.8946586208748903, "grad_norm": 1.3491520881652832, "learning_rate": 5.7621763163639655e-06, "loss": 1.4557, "step": 24982 }, { "epoch": 0.8946944330044586, "grad_norm": 1.6579393148422241, "learning_rate": 5.758296508212013e-06, "loss": 1.3494, "step": 24983 }, { "epoch": 0.8947302451340269, "grad_norm": 1.8803688287734985, "learning_rate": 5.7544179679606234e-06, "loss": 1.9863, "step": 24984 }, { "epoch": 0.8947660572635951, "grad_norm": 1.9124130010604858, "learning_rate": 5.750540695661955e-06, "loss": 1.3755, "step": 24985 }, { "epoch": 0.8948018693931634, "grad_norm": 2.12723970413208, "learning_rate": 5.746664691368187e-06, "loss": 1.3116, "step": 24986 }, { "epoch": 0.8948376815227318, "grad_norm": 1.3383265733718872, "learning_rate": 5.742789955131489e-06, "loss": 1.3311, "step": 24987 }, { "epoch": 0.8948734936523001, "grad_norm": 2.026008367538452, "learning_rate": 5.7389164870039535e-06, "loss": 1.4909, "step": 24988 }, { "epoch": 0.8949093057818683, "grad_norm": 1.573212742805481, "learning_rate": 5.735044287037705e-06, "loss": 1.3221, "step": 24989 }, { "epoch": 0.8949451179114366, "grad_norm": 1.6352729797363281, "learning_rate": 5.7311733552848355e-06, "loss": 1.5761, "step": 24990 }, { "epoch": 0.8949809300410049, "grad_norm": 1.3640257120132446, "learning_rate": 5.727303691797459e-06, "loss": 1.2875, "step": 24991 }, { "epoch": 0.8950167421705731, "grad_norm": 2.3444406986236572, "learning_rate": 5.723435296627588e-06, "loss": 1.3894, "step": 24992 }, { "epoch": 0.8950525543001414, "grad_norm": 1.89201021194458, "learning_rate": 5.719568169827283e-06, "loss": 1.1353, "step": 24993 }, { "epoch": 0.8950883664297098, "grad_norm": 1.320304274559021, "learning_rate": 5.71570231144859e-06, "loss": 1.4152, "step": 24994 }, { "epoch": 0.8951241785592781, "grad_norm": 2.161402940750122, "learning_rate": 5.7118377215435e-06, "loss": 1.3054, "step": 24995 }, { "epoch": 0.8951599906888463, "grad_norm": 1.4582648277282715, "learning_rate": 5.7079744001640065e-06, "loss": 1.5407, "step": 24996 }, { "epoch": 0.8951958028184146, "grad_norm": 1.544952630996704, "learning_rate": 5.70411234736209e-06, "loss": 1.2509, "step": 24997 }, { "epoch": 0.8952316149479829, "grad_norm": 1.4822784662246704, "learning_rate": 5.700251563189718e-06, "loss": 1.6312, "step": 24998 }, { "epoch": 0.8952674270775511, "grad_norm": 2.0058014392852783, "learning_rate": 5.696392047698817e-06, "loss": 1.6654, "step": 24999 }, { "epoch": 0.8953032392071194, "grad_norm": 2.085139751434326, "learning_rate": 5.6925338009413136e-06, "loss": 1.3925, "step": 25000 }, { "epoch": 0.8953390513366878, "grad_norm": 1.3141660690307617, "learning_rate": 5.688676822969119e-06, "loss": 1.3244, "step": 25001 }, { "epoch": 0.895374863466256, "grad_norm": 1.5696529150009155, "learning_rate": 5.684821113834138e-06, "loss": 1.462, "step": 25002 }, { "epoch": 0.8954106755958243, "grad_norm": 1.421932578086853, "learning_rate": 5.680966673588217e-06, "loss": 1.2906, "step": 25003 }, { "epoch": 0.8954464877253926, "grad_norm": 2.3099539279937744, "learning_rate": 5.677113502283227e-06, "loss": 1.2604, "step": 25004 }, { "epoch": 0.8954822998549609, "grad_norm": 1.683809757232666, "learning_rate": 5.673261599971025e-06, "loss": 1.7746, "step": 25005 }, { "epoch": 0.8955181119845291, "grad_norm": 1.5996848344802856, "learning_rate": 5.669410966703393e-06, "loss": 1.1726, "step": 25006 }, { "epoch": 0.8955539241140974, "grad_norm": 1.568306803703308, "learning_rate": 5.665561602532165e-06, "loss": 1.6927, "step": 25007 }, { "epoch": 0.8955897362436658, "grad_norm": 1.9789416790008545, "learning_rate": 5.661713507509126e-06, "loss": 1.327, "step": 25008 }, { "epoch": 0.895625548373234, "grad_norm": 1.8262815475463867, "learning_rate": 5.657866681686053e-06, "loss": 1.6479, "step": 25009 }, { "epoch": 0.8956613605028023, "grad_norm": 1.8193621635437012, "learning_rate": 5.654021125114672e-06, "loss": 1.9024, "step": 25010 }, { "epoch": 0.8956971726323706, "grad_norm": 2.2621870040893555, "learning_rate": 5.6501768378467546e-06, "loss": 1.37, "step": 25011 }, { "epoch": 0.8957329847619389, "grad_norm": 2.0445046424865723, "learning_rate": 5.646333819933991e-06, "loss": 1.2324, "step": 25012 }, { "epoch": 0.8957687968915071, "grad_norm": 2.021339178085327, "learning_rate": 5.642492071428118e-06, "loss": 1.6428, "step": 25013 }, { "epoch": 0.8958046090210754, "grad_norm": 2.2916150093078613, "learning_rate": 5.638651592380795e-06, "loss": 1.6459, "step": 25014 }, { "epoch": 0.8958404211506438, "grad_norm": 1.9969441890716553, "learning_rate": 5.63481238284368e-06, "loss": 1.1157, "step": 25015 }, { "epoch": 0.895876233280212, "grad_norm": 1.4788435697555542, "learning_rate": 5.630974442868475e-06, "loss": 1.5839, "step": 25016 }, { "epoch": 0.8959120454097803, "grad_norm": 1.4995172023773193, "learning_rate": 5.627137772506752e-06, "loss": 1.339, "step": 25017 }, { "epoch": 0.8959478575393486, "grad_norm": 1.5286364555358887, "learning_rate": 5.623302371810169e-06, "loss": 1.2414, "step": 25018 }, { "epoch": 0.8959836696689168, "grad_norm": 1.7818827629089355, "learning_rate": 5.619468240830306e-06, "loss": 1.4386, "step": 25019 }, { "epoch": 0.8960194817984851, "grad_norm": 2.339745044708252, "learning_rate": 5.615635379618778e-06, "loss": 1.3534, "step": 25020 }, { "epoch": 0.8960552939280534, "grad_norm": 1.4172555208206177, "learning_rate": 5.61180378822711e-06, "loss": 1.5372, "step": 25021 }, { "epoch": 0.8960911060576218, "grad_norm": 1.7020219564437866, "learning_rate": 5.607973466706873e-06, "loss": 1.2331, "step": 25022 }, { "epoch": 0.89612691818719, "grad_norm": 1.4076426029205322, "learning_rate": 5.604144415109614e-06, "loss": 1.187, "step": 25023 }, { "epoch": 0.8961627303167583, "grad_norm": 1.5349396467208862, "learning_rate": 5.600316633486802e-06, "loss": 1.3395, "step": 25024 }, { "epoch": 0.8961985424463266, "grad_norm": 2.8167307376861572, "learning_rate": 5.596490121889975e-06, "loss": 1.4081, "step": 25025 }, { "epoch": 0.8962343545758948, "grad_norm": 1.624639630317688, "learning_rate": 5.592664880370602e-06, "loss": 1.3656, "step": 25026 }, { "epoch": 0.8962701667054631, "grad_norm": 2.108041524887085, "learning_rate": 5.588840908980153e-06, "loss": 1.2949, "step": 25027 }, { "epoch": 0.8963059788350314, "grad_norm": 1.5697460174560547, "learning_rate": 5.585018207770054e-06, "loss": 1.7335, "step": 25028 }, { "epoch": 0.8963417909645998, "grad_norm": 1.555113434791565, "learning_rate": 5.581196776791752e-06, "loss": 1.3761, "step": 25029 }, { "epoch": 0.896377603094168, "grad_norm": 1.8838539123535156, "learning_rate": 5.5773766160966634e-06, "loss": 1.6462, "step": 25030 }, { "epoch": 0.8964134152237363, "grad_norm": 1.76872718334198, "learning_rate": 5.5735577257361785e-06, "loss": 1.1874, "step": 25031 }, { "epoch": 0.8964492273533046, "grad_norm": 1.5243675708770752, "learning_rate": 5.569740105761679e-06, "loss": 1.387, "step": 25032 }, { "epoch": 0.8964850394828728, "grad_norm": 1.6961188316345215, "learning_rate": 5.565923756224489e-06, "loss": 1.4076, "step": 25033 }, { "epoch": 0.8965208516124411, "grad_norm": 2.217806816101074, "learning_rate": 5.562108677176015e-06, "loss": 1.3709, "step": 25034 }, { "epoch": 0.8965566637420094, "grad_norm": 1.590507984161377, "learning_rate": 5.558294868667535e-06, "loss": 1.444, "step": 25035 }, { "epoch": 0.8965924758715778, "grad_norm": 1.7941250801086426, "learning_rate": 5.554482330750388e-06, "loss": 1.5058, "step": 25036 }, { "epoch": 0.896628288001146, "grad_norm": 1.739349365234375, "learning_rate": 5.550671063475832e-06, "loss": 1.4063, "step": 25037 }, { "epoch": 0.8966641001307143, "grad_norm": 1.7744945287704468, "learning_rate": 5.546861066895193e-06, "loss": 1.6937, "step": 25038 }, { "epoch": 0.8966999122602826, "grad_norm": 1.3879646062850952, "learning_rate": 5.543052341059707e-06, "loss": 1.3204, "step": 25039 }, { "epoch": 0.8967357243898508, "grad_norm": 1.5593581199645996, "learning_rate": 5.5392448860205785e-06, "loss": 1.4807, "step": 25040 }, { "epoch": 0.8967715365194191, "grad_norm": 2.365830659866333, "learning_rate": 5.535438701829088e-06, "loss": 1.6088, "step": 25041 }, { "epoch": 0.8968073486489874, "grad_norm": 2.006086587905884, "learning_rate": 5.5316337885364165e-06, "loss": 1.7096, "step": 25042 }, { "epoch": 0.8968431607785557, "grad_norm": 2.7186625003814697, "learning_rate": 5.527830146193758e-06, "loss": 1.707, "step": 25043 }, { "epoch": 0.896878972908124, "grad_norm": 1.4928075075149536, "learning_rate": 5.5240277748522694e-06, "loss": 1.0195, "step": 25044 }, { "epoch": 0.8969147850376923, "grad_norm": 2.0089306831359863, "learning_rate": 5.520226674563145e-06, "loss": 1.4221, "step": 25045 }, { "epoch": 0.8969505971672606, "grad_norm": 1.3066225051879883, "learning_rate": 5.516426845377476e-06, "loss": 1.3159, "step": 25046 }, { "epoch": 0.8969864092968288, "grad_norm": 1.5065250396728516, "learning_rate": 5.512628287346433e-06, "loss": 1.3258, "step": 25047 }, { "epoch": 0.8970222214263971, "grad_norm": 1.5593777894973755, "learning_rate": 5.5088310005210865e-06, "loss": 1.1823, "step": 25048 }, { "epoch": 0.8970580335559654, "grad_norm": 1.4893614053726196, "learning_rate": 5.505034984952529e-06, "loss": 1.4447, "step": 25049 }, { "epoch": 0.8970938456855337, "grad_norm": 1.4334756135940552, "learning_rate": 5.501240240691852e-06, "loss": 1.6804, "step": 25050 }, { "epoch": 0.897129657815102, "grad_norm": 1.6654868125915527, "learning_rate": 5.49744676779006e-06, "loss": 1.3822, "step": 25051 }, { "epoch": 0.8971654699446703, "grad_norm": 1.5863392353057861, "learning_rate": 5.4936545662982455e-06, "loss": 1.5038, "step": 25052 }, { "epoch": 0.8972012820742385, "grad_norm": 1.6635081768035889, "learning_rate": 5.4898636362674e-06, "loss": 1.2242, "step": 25053 }, { "epoch": 0.8972370942038068, "grad_norm": 1.979265570640564, "learning_rate": 5.486073977748541e-06, "loss": 1.1738, "step": 25054 }, { "epoch": 0.8972729063333751, "grad_norm": 1.9149836301803589, "learning_rate": 5.482285590792613e-06, "loss": 1.4048, "step": 25055 }, { "epoch": 0.8973087184629434, "grad_norm": 1.5873132944107056, "learning_rate": 5.478498475450644e-06, "loss": 1.4913, "step": 25056 }, { "epoch": 0.8973445305925117, "grad_norm": 1.8850102424621582, "learning_rate": 5.47471263177356e-06, "loss": 1.5126, "step": 25057 }, { "epoch": 0.89738034272208, "grad_norm": 1.7988669872283936, "learning_rate": 5.470928059812264e-06, "loss": 1.1521, "step": 25058 }, { "epoch": 0.8974161548516483, "grad_norm": 1.4897023439407349, "learning_rate": 5.467144759617704e-06, "loss": 1.529, "step": 25059 }, { "epoch": 0.8974519669812165, "grad_norm": 1.9250378608703613, "learning_rate": 5.463362731240773e-06, "loss": 1.5413, "step": 25060 }, { "epoch": 0.8974877791107848, "grad_norm": 1.302215576171875, "learning_rate": 5.4595819747323636e-06, "loss": 1.2577, "step": 25061 }, { "epoch": 0.8975235912403531, "grad_norm": 1.791074514389038, "learning_rate": 5.455802490143314e-06, "loss": 1.4449, "step": 25062 }, { "epoch": 0.8975594033699213, "grad_norm": 2.012291669845581, "learning_rate": 5.4520242775244925e-06, "loss": 1.218, "step": 25063 }, { "epoch": 0.8975952154994897, "grad_norm": 1.5739734172821045, "learning_rate": 5.4482473369267264e-06, "loss": 1.1135, "step": 25064 }, { "epoch": 0.897631027629058, "grad_norm": 1.5240012407302856, "learning_rate": 5.444471668400841e-06, "loss": 1.5621, "step": 25065 }, { "epoch": 0.8976668397586263, "grad_norm": 1.944368600845337, "learning_rate": 5.440697271997608e-06, "loss": 1.0039, "step": 25066 }, { "epoch": 0.8977026518881945, "grad_norm": 1.4575976133346558, "learning_rate": 5.436924147767819e-06, "loss": 1.547, "step": 25067 }, { "epoch": 0.8977384640177628, "grad_norm": 2.7136669158935547, "learning_rate": 5.433152295762256e-06, "loss": 1.402, "step": 25068 }, { "epoch": 0.8977742761473311, "grad_norm": 2.263066291809082, "learning_rate": 5.429381716031634e-06, "loss": 1.2947, "step": 25069 }, { "epoch": 0.8978100882768993, "grad_norm": 1.668306589126587, "learning_rate": 5.42561240862669e-06, "loss": 1.6464, "step": 25070 }, { "epoch": 0.8978459004064677, "grad_norm": 1.28193998336792, "learning_rate": 5.421844373598139e-06, "loss": 1.2046, "step": 25071 }, { "epoch": 0.897881712536036, "grad_norm": 1.325032353401184, "learning_rate": 5.418077610996686e-06, "loss": 1.4681, "step": 25072 }, { "epoch": 0.8979175246656043, "grad_norm": 1.4316115379333496, "learning_rate": 5.4143121208729885e-06, "loss": 1.579, "step": 25073 }, { "epoch": 0.8979533367951725, "grad_norm": 1.4661067724227905, "learning_rate": 5.410547903277707e-06, "loss": 1.2959, "step": 25074 }, { "epoch": 0.8979891489247408, "grad_norm": 1.6015124320983887, "learning_rate": 5.4067849582615124e-06, "loss": 1.5803, "step": 25075 }, { "epoch": 0.8980249610543091, "grad_norm": 1.9564155340194702, "learning_rate": 5.403023285874997e-06, "loss": 1.4142, "step": 25076 }, { "epoch": 0.8980607731838773, "grad_norm": 1.7000601291656494, "learning_rate": 5.399262886168777e-06, "loss": 1.4036, "step": 25077 }, { "epoch": 0.8980965853134457, "grad_norm": 2.1915886402130127, "learning_rate": 5.395503759193454e-06, "loss": 1.7342, "step": 25078 }, { "epoch": 0.898132397443014, "grad_norm": 1.5303142070770264, "learning_rate": 5.391745904999601e-06, "loss": 1.4086, "step": 25079 }, { "epoch": 0.8981682095725823, "grad_norm": 1.7111073732376099, "learning_rate": 5.387989323637765e-06, "loss": 1.5384, "step": 25080 }, { "epoch": 0.8982040217021505, "grad_norm": 1.3334734439849854, "learning_rate": 5.384234015158495e-06, "loss": 1.4625, "step": 25081 }, { "epoch": 0.8982398338317188, "grad_norm": 2.577977418899536, "learning_rate": 5.380479979612307e-06, "loss": 1.4246, "step": 25082 }, { "epoch": 0.8982756459612871, "grad_norm": 1.6402453184127808, "learning_rate": 5.376727217049726e-06, "loss": 1.4184, "step": 25083 }, { "epoch": 0.8983114580908553, "grad_norm": 1.353309154510498, "learning_rate": 5.372975727521201e-06, "loss": 1.2795, "step": 25084 }, { "epoch": 0.8983472702204237, "grad_norm": 1.778639793395996, "learning_rate": 5.369225511077236e-06, "loss": 1.47, "step": 25085 }, { "epoch": 0.898383082349992, "grad_norm": 1.7196940183639526, "learning_rate": 5.36547656776829e-06, "loss": 1.4584, "step": 25086 }, { "epoch": 0.8984188944795602, "grad_norm": 1.3862847089767456, "learning_rate": 5.36172889764478e-06, "loss": 1.7245, "step": 25087 }, { "epoch": 0.8984547066091285, "grad_norm": 1.9796829223632812, "learning_rate": 5.357982500757119e-06, "loss": 1.4531, "step": 25088 }, { "epoch": 0.8984905187386968, "grad_norm": 1.9251147508621216, "learning_rate": 5.354237377155735e-06, "loss": 1.4222, "step": 25089 }, { "epoch": 0.898526330868265, "grad_norm": 1.603308916091919, "learning_rate": 5.3504935268910095e-06, "loss": 1.2329, "step": 25090 }, { "epoch": 0.8985621429978333, "grad_norm": 1.7304174900054932, "learning_rate": 5.346750950013301e-06, "loss": 1.6948, "step": 25091 }, { "epoch": 0.8985979551274017, "grad_norm": 1.523800253868103, "learning_rate": 5.343009646572949e-06, "loss": 1.5058, "step": 25092 }, { "epoch": 0.89863376725697, "grad_norm": 1.9301091432571411, "learning_rate": 5.3392696166203345e-06, "loss": 1.6271, "step": 25093 }, { "epoch": 0.8986695793865382, "grad_norm": 1.908660650253296, "learning_rate": 5.335530860205718e-06, "loss": 1.2108, "step": 25094 }, { "epoch": 0.8987053915161065, "grad_norm": 1.437114953994751, "learning_rate": 5.331793377379435e-06, "loss": 1.5768, "step": 25095 }, { "epoch": 0.8987412036456748, "grad_norm": 1.4844969511032104, "learning_rate": 5.328057168191747e-06, "loss": 1.2316, "step": 25096 }, { "epoch": 0.898777015775243, "grad_norm": 1.9871184825897217, "learning_rate": 5.324322232692947e-06, "loss": 1.3748, "step": 25097 }, { "epoch": 0.8988128279048113, "grad_norm": 1.5690678358078003, "learning_rate": 5.32058857093326e-06, "loss": 1.5002, "step": 25098 }, { "epoch": 0.8988486400343797, "grad_norm": 2.270082712173462, "learning_rate": 5.316856182962926e-06, "loss": 1.6875, "step": 25099 }, { "epoch": 0.898884452163948, "grad_norm": 1.716614007949829, "learning_rate": 5.313125068832159e-06, "loss": 1.4479, "step": 25100 }, { "epoch": 0.8989202642935162, "grad_norm": 2.0033087730407715, "learning_rate": 5.309395228591174e-06, "loss": 1.2221, "step": 25101 }, { "epoch": 0.8989560764230845, "grad_norm": 1.4407968521118164, "learning_rate": 5.305666662290121e-06, "loss": 1.3376, "step": 25102 }, { "epoch": 0.8989918885526528, "grad_norm": 1.4618561267852783, "learning_rate": 5.30193936997917e-06, "loss": 1.6145, "step": 25103 }, { "epoch": 0.899027700682221, "grad_norm": 2.3181114196777344, "learning_rate": 5.298213351708492e-06, "loss": 1.4054, "step": 25104 }, { "epoch": 0.8990635128117893, "grad_norm": 1.5716500282287598, "learning_rate": 5.29448860752817e-06, "loss": 1.3271, "step": 25105 }, { "epoch": 0.8990993249413577, "grad_norm": 1.612802505493164, "learning_rate": 5.290765137488351e-06, "loss": 1.4278, "step": 25106 }, { "epoch": 0.899135137070926, "grad_norm": 1.4087631702423096, "learning_rate": 5.287042941639131e-06, "loss": 1.6085, "step": 25107 }, { "epoch": 0.8991709492004942, "grad_norm": 2.212252616882324, "learning_rate": 5.2833220200305785e-06, "loss": 1.8866, "step": 25108 }, { "epoch": 0.8992067613300625, "grad_norm": 1.4179940223693848, "learning_rate": 5.279602372712744e-06, "loss": 1.5334, "step": 25109 }, { "epoch": 0.8992425734596308, "grad_norm": 1.4929925203323364, "learning_rate": 5.275883999735676e-06, "loss": 1.3277, "step": 25110 }, { "epoch": 0.899278385589199, "grad_norm": 2.1377246379852295, "learning_rate": 5.272166901149423e-06, "loss": 1.1745, "step": 25111 }, { "epoch": 0.8993141977187673, "grad_norm": 1.96725332736969, "learning_rate": 5.2684510770039556e-06, "loss": 1.696, "step": 25112 }, { "epoch": 0.8993500098483357, "grad_norm": 2.2955400943756104, "learning_rate": 5.264736527349279e-06, "loss": 1.2985, "step": 25113 }, { "epoch": 0.899385821977904, "grad_norm": 1.76557195186615, "learning_rate": 5.261023252235386e-06, "loss": 1.5453, "step": 25114 }, { "epoch": 0.8994216341074722, "grad_norm": 1.7912245988845825, "learning_rate": 5.257311251712227e-06, "loss": 1.6088, "step": 25115 }, { "epoch": 0.8994574462370405, "grad_norm": 1.81741201877594, "learning_rate": 5.253600525829716e-06, "loss": 1.8192, "step": 25116 }, { "epoch": 0.8994932583666088, "grad_norm": 1.5481551885604858, "learning_rate": 5.249891074637803e-06, "loss": 1.2055, "step": 25117 }, { "epoch": 0.899529070496177, "grad_norm": 2.0782535076141357, "learning_rate": 5.2461828981863916e-06, "loss": 1.502, "step": 25118 }, { "epoch": 0.8995648826257453, "grad_norm": 1.717686653137207, "learning_rate": 5.2424759965253645e-06, "loss": 1.6172, "step": 25119 }, { "epoch": 0.8996006947553137, "grad_norm": 2.288208246231079, "learning_rate": 5.2387703697046045e-06, "loss": 1.2769, "step": 25120 }, { "epoch": 0.899636506884882, "grad_norm": 1.6821223497390747, "learning_rate": 5.235066017773926e-06, "loss": 1.1108, "step": 25121 }, { "epoch": 0.8996723190144502, "grad_norm": 1.282433271408081, "learning_rate": 5.2313629407832355e-06, "loss": 1.3598, "step": 25122 }, { "epoch": 0.8997081311440185, "grad_norm": 1.8116391897201538, "learning_rate": 5.227661138782281e-06, "loss": 1.7734, "step": 25123 }, { "epoch": 0.8997439432735868, "grad_norm": 1.6620230674743652, "learning_rate": 5.22396061182091e-06, "loss": 1.3506, "step": 25124 }, { "epoch": 0.899779755403155, "grad_norm": 1.840793251991272, "learning_rate": 5.220261359948897e-06, "loss": 1.705, "step": 25125 }, { "epoch": 0.8998155675327233, "grad_norm": 1.7171930074691772, "learning_rate": 5.216563383216022e-06, "loss": 1.5466, "step": 25126 }, { "epoch": 0.8998513796622917, "grad_norm": 1.6956909894943237, "learning_rate": 5.2128666816720015e-06, "loss": 1.7422, "step": 25127 }, { "epoch": 0.8998871917918599, "grad_norm": 2.0536861419677734, "learning_rate": 5.209171255366607e-06, "loss": 1.3849, "step": 25128 }, { "epoch": 0.8999230039214282, "grad_norm": 1.4225544929504395, "learning_rate": 5.205477104349554e-06, "loss": 1.457, "step": 25129 }, { "epoch": 0.8999588160509965, "grad_norm": 1.5567337274551392, "learning_rate": 5.2017842286705145e-06, "loss": 1.4507, "step": 25130 }, { "epoch": 0.8999946281805647, "grad_norm": 1.5060020685195923, "learning_rate": 5.198092628379192e-06, "loss": 1.6578, "step": 25131 }, { "epoch": 0.900030440310133, "grad_norm": 2.0625991821289062, "learning_rate": 5.194402303525225e-06, "loss": 1.5786, "step": 25132 }, { "epoch": 0.9000662524397013, "grad_norm": 1.730444073677063, "learning_rate": 5.190713254158319e-06, "loss": 1.2594, "step": 25133 }, { "epoch": 0.9001020645692697, "grad_norm": 1.809962272644043, "learning_rate": 5.187025480328056e-06, "loss": 1.4024, "step": 25134 }, { "epoch": 0.9001378766988379, "grad_norm": 1.6739686727523804, "learning_rate": 5.183338982084074e-06, "loss": 1.762, "step": 25135 }, { "epoch": 0.9001736888284062, "grad_norm": 1.4611245393753052, "learning_rate": 5.179653759475933e-06, "loss": 1.3352, "step": 25136 }, { "epoch": 0.9002095009579745, "grad_norm": 2.249504804611206, "learning_rate": 5.175969812553272e-06, "loss": 1.1882, "step": 25137 }, { "epoch": 0.9002453130875427, "grad_norm": 1.9460619688034058, "learning_rate": 5.172287141365628e-06, "loss": 1.3776, "step": 25138 }, { "epoch": 0.900281125217111, "grad_norm": 1.6175462007522583, "learning_rate": 5.168605745962507e-06, "loss": 1.5004, "step": 25139 }, { "epoch": 0.9003169373466793, "grad_norm": 1.803956151008606, "learning_rate": 5.164925626393502e-06, "loss": 1.5692, "step": 25140 }, { "epoch": 0.9003527494762477, "grad_norm": 1.1598551273345947, "learning_rate": 5.161246782708073e-06, "loss": 1.3922, "step": 25141 }, { "epoch": 0.9003885616058159, "grad_norm": 1.4358941316604614, "learning_rate": 5.15756921495576e-06, "loss": 1.4219, "step": 25142 }, { "epoch": 0.9004243737353842, "grad_norm": 1.7593889236450195, "learning_rate": 5.153892923185977e-06, "loss": 1.4586, "step": 25143 }, { "epoch": 0.9004601858649525, "grad_norm": 1.4885627031326294, "learning_rate": 5.150217907448263e-06, "loss": 1.5213, "step": 25144 }, { "epoch": 0.9004959979945207, "grad_norm": 1.5121746063232422, "learning_rate": 5.146544167792011e-06, "loss": 1.3499, "step": 25145 }, { "epoch": 0.900531810124089, "grad_norm": 1.7629821300506592, "learning_rate": 5.1428717042666385e-06, "loss": 1.2924, "step": 25146 }, { "epoch": 0.9005676222536573, "grad_norm": 1.4092707633972168, "learning_rate": 5.1392005169215825e-06, "loss": 1.3375, "step": 25147 }, { "epoch": 0.9006034343832257, "grad_norm": 1.5190919637680054, "learning_rate": 5.1355306058062044e-06, "loss": 1.626, "step": 25148 }, { "epoch": 0.9006392465127939, "grad_norm": 1.983694314956665, "learning_rate": 5.13186197096992e-06, "loss": 1.2005, "step": 25149 }, { "epoch": 0.9006750586423622, "grad_norm": 1.4387457370758057, "learning_rate": 5.128194612462034e-06, "loss": 1.4169, "step": 25150 }, { "epoch": 0.9007108707719305, "grad_norm": 2.042126178741455, "learning_rate": 5.12452853033194e-06, "loss": 1.8489, "step": 25151 }, { "epoch": 0.9007466829014987, "grad_norm": 1.567436933517456, "learning_rate": 5.120863724628922e-06, "loss": 1.6615, "step": 25152 }, { "epoch": 0.900782495031067, "grad_norm": 1.5410945415496826, "learning_rate": 5.117200195402316e-06, "loss": 1.1489, "step": 25153 }, { "epoch": 0.9008183071606353, "grad_norm": 1.5345313549041748, "learning_rate": 5.113537942701363e-06, "loss": 1.6927, "step": 25154 }, { "epoch": 0.9008541192902035, "grad_norm": 1.8208950757980347, "learning_rate": 5.109876966575377e-06, "loss": 1.3238, "step": 25155 }, { "epoch": 0.9008899314197719, "grad_norm": 1.6119062900543213, "learning_rate": 5.106217267073598e-06, "loss": 1.6702, "step": 25156 }, { "epoch": 0.9009257435493402, "grad_norm": 1.6705838441848755, "learning_rate": 5.102558844245265e-06, "loss": 1.004, "step": 25157 }, { "epoch": 0.9009615556789085, "grad_norm": 1.38338041305542, "learning_rate": 5.09890169813958e-06, "loss": 1.4428, "step": 25158 }, { "epoch": 0.9009973678084767, "grad_norm": 1.6136221885681152, "learning_rate": 5.095245828805761e-06, "loss": 1.3344, "step": 25159 }, { "epoch": 0.901033179938045, "grad_norm": 1.925017237663269, "learning_rate": 5.091591236293003e-06, "loss": 1.5366, "step": 25160 }, { "epoch": 0.9010689920676133, "grad_norm": 1.5999585390090942, "learning_rate": 5.087937920650454e-06, "loss": 1.3421, "step": 25161 }, { "epoch": 0.9011048041971815, "grad_norm": 1.429749608039856, "learning_rate": 5.0842858819272644e-06, "loss": 1.6059, "step": 25162 }, { "epoch": 0.9011406163267499, "grad_norm": 1.6445558071136475, "learning_rate": 5.0806351201725944e-06, "loss": 1.3776, "step": 25163 }, { "epoch": 0.9011764284563182, "grad_norm": 1.7813189029693604, "learning_rate": 5.076985635435527e-06, "loss": 1.43, "step": 25164 }, { "epoch": 0.9012122405858864, "grad_norm": 1.552821159362793, "learning_rate": 5.073337427765179e-06, "loss": 1.5426, "step": 25165 }, { "epoch": 0.9012480527154547, "grad_norm": 1.7873458862304688, "learning_rate": 5.069690497210633e-06, "loss": 1.3311, "step": 25166 }, { "epoch": 0.901283864845023, "grad_norm": 1.6734850406646729, "learning_rate": 5.06604484382095e-06, "loss": 1.394, "step": 25167 }, { "epoch": 0.9013196769745913, "grad_norm": 1.4095349311828613, "learning_rate": 5.062400467645178e-06, "loss": 1.2577, "step": 25168 }, { "epoch": 0.9013554891041595, "grad_norm": 1.764383316040039, "learning_rate": 5.058757368732336e-06, "loss": 1.3601, "step": 25169 }, { "epoch": 0.9013913012337279, "grad_norm": 1.9552977085113525, "learning_rate": 5.055115547131462e-06, "loss": 1.6717, "step": 25170 }, { "epoch": 0.9014271133632962, "grad_norm": 1.2154018878936768, "learning_rate": 5.051475002891537e-06, "loss": 1.1555, "step": 25171 }, { "epoch": 0.9014629254928644, "grad_norm": 2.186868906021118, "learning_rate": 5.047835736061535e-06, "loss": 1.2166, "step": 25172 }, { "epoch": 0.9014987376224327, "grad_norm": 1.7284328937530518, "learning_rate": 5.044197746690427e-06, "loss": 1.236, "step": 25173 }, { "epoch": 0.901534549752001, "grad_norm": 1.353801965713501, "learning_rate": 5.040561034827163e-06, "loss": 1.3683, "step": 25174 }, { "epoch": 0.9015703618815692, "grad_norm": 1.7040997743606567, "learning_rate": 5.036925600520648e-06, "loss": 1.4311, "step": 25175 }, { "epoch": 0.9016061740111375, "grad_norm": 1.961835503578186, "learning_rate": 5.0332914438197984e-06, "loss": 1.1263, "step": 25176 }, { "epoch": 0.9016419861407059, "grad_norm": 1.8802769184112549, "learning_rate": 5.029658564773521e-06, "loss": 1.5408, "step": 25177 }, { "epoch": 0.9016777982702742, "grad_norm": 1.668648600578308, "learning_rate": 5.026026963430697e-06, "loss": 1.2913, "step": 25178 }, { "epoch": 0.9017136103998424, "grad_norm": 1.8066505193710327, "learning_rate": 5.022396639840166e-06, "loss": 1.5094, "step": 25179 }, { "epoch": 0.9017494225294107, "grad_norm": 1.4674502611160278, "learning_rate": 5.018767594050766e-06, "loss": 1.4396, "step": 25180 }, { "epoch": 0.901785234658979, "grad_norm": 1.7392138242721558, "learning_rate": 5.015139826111348e-06, "loss": 1.8019, "step": 25181 }, { "epoch": 0.9018210467885472, "grad_norm": 1.5244306325912476, "learning_rate": 5.0115133360706945e-06, "loss": 1.6719, "step": 25182 }, { "epoch": 0.9018568589181155, "grad_norm": 1.4753122329711914, "learning_rate": 5.0078881239776e-06, "loss": 1.09, "step": 25183 }, { "epoch": 0.9018926710476839, "grad_norm": 1.8521908521652222, "learning_rate": 5.0042641898808364e-06, "loss": 1.6567, "step": 25184 }, { "epoch": 0.9019284831772522, "grad_norm": 1.287305235862732, "learning_rate": 5.000641533829176e-06, "loss": 1.3805, "step": 25185 }, { "epoch": 0.9019642953068204, "grad_norm": 1.5233205556869507, "learning_rate": 4.9970201558713345e-06, "loss": 1.5226, "step": 25186 }, { "epoch": 0.9020001074363887, "grad_norm": 1.4274674654006958, "learning_rate": 4.99340005605603e-06, "loss": 1.4153, "step": 25187 }, { "epoch": 0.902035919565957, "grad_norm": 1.5530496835708618, "learning_rate": 4.98978123443199e-06, "loss": 1.6574, "step": 25188 }, { "epoch": 0.9020717316955252, "grad_norm": 1.6646677255630493, "learning_rate": 4.986163691047896e-06, "loss": 1.177, "step": 25189 }, { "epoch": 0.9021075438250935, "grad_norm": 1.5656930208206177, "learning_rate": 4.982547425952399e-06, "loss": 1.5505, "step": 25190 }, { "epoch": 0.9021433559546619, "grad_norm": 1.369274377822876, "learning_rate": 4.9789324391941615e-06, "loss": 1.3201, "step": 25191 }, { "epoch": 0.9021791680842302, "grad_norm": 1.6000550985336304, "learning_rate": 4.975318730821843e-06, "loss": 1.2981, "step": 25192 }, { "epoch": 0.9022149802137984, "grad_norm": 1.3364497423171997, "learning_rate": 4.971706300884016e-06, "loss": 1.3928, "step": 25193 }, { "epoch": 0.9022507923433667, "grad_norm": 1.555607557296753, "learning_rate": 4.9680951494292975e-06, "loss": 1.4731, "step": 25194 }, { "epoch": 0.902286604472935, "grad_norm": 1.5311139822006226, "learning_rate": 4.964485276506281e-06, "loss": 1.416, "step": 25195 }, { "epoch": 0.9023224166025032, "grad_norm": 2.2002294063568115, "learning_rate": 4.960876682163551e-06, "loss": 1.5436, "step": 25196 }, { "epoch": 0.9023582287320715, "grad_norm": 1.4345999956130981, "learning_rate": 4.957269366449613e-06, "loss": 1.2206, "step": 25197 }, { "epoch": 0.9023940408616399, "grad_norm": 1.821091651916504, "learning_rate": 4.953663329413017e-06, "loss": 1.3636, "step": 25198 }, { "epoch": 0.9024298529912081, "grad_norm": 2.0151352882385254, "learning_rate": 4.950058571102289e-06, "loss": 1.5156, "step": 25199 }, { "epoch": 0.9024656651207764, "grad_norm": 2.489975929260254, "learning_rate": 4.946455091565916e-06, "loss": 1.3341, "step": 25200 }, { "epoch": 0.9025014772503447, "grad_norm": 1.6598349809646606, "learning_rate": 4.942852890852367e-06, "loss": 1.5651, "step": 25201 }, { "epoch": 0.902537289379913, "grad_norm": 1.6895561218261719, "learning_rate": 4.939251969010128e-06, "loss": 1.4153, "step": 25202 }, { "epoch": 0.9025731015094812, "grad_norm": 2.297715902328491, "learning_rate": 4.935652326087648e-06, "loss": 1.6323, "step": 25203 }, { "epoch": 0.9026089136390495, "grad_norm": 1.411095142364502, "learning_rate": 4.932053962133321e-06, "loss": 1.3637, "step": 25204 }, { "epoch": 0.9026447257686179, "grad_norm": 1.6230731010437012, "learning_rate": 4.928456877195586e-06, "loss": 1.3658, "step": 25205 }, { "epoch": 0.9026805378981861, "grad_norm": 2.2268002033233643, "learning_rate": 4.924861071322817e-06, "loss": 1.602, "step": 25206 }, { "epoch": 0.9027163500277544, "grad_norm": 1.9682823419570923, "learning_rate": 4.92126654456343e-06, "loss": 1.4974, "step": 25207 }, { "epoch": 0.9027521621573227, "grad_norm": 1.9790271520614624, "learning_rate": 4.917673296965741e-06, "loss": 1.635, "step": 25208 }, { "epoch": 0.902787974286891, "grad_norm": 1.872644066810608, "learning_rate": 4.914081328578113e-06, "loss": 1.2021, "step": 25209 }, { "epoch": 0.9028237864164592, "grad_norm": 2.065763235092163, "learning_rate": 4.910490639448884e-06, "loss": 1.497, "step": 25210 }, { "epoch": 0.9028595985460275, "grad_norm": 1.9289997816085815, "learning_rate": 4.906901229626326e-06, "loss": 1.7655, "step": 25211 }, { "epoch": 0.9028954106755959, "grad_norm": 1.6984025239944458, "learning_rate": 4.903313099158757e-06, "loss": 1.4767, "step": 25212 }, { "epoch": 0.9029312228051641, "grad_norm": 1.4198287725448608, "learning_rate": 4.8997262480944385e-06, "loss": 1.3188, "step": 25213 }, { "epoch": 0.9029670349347324, "grad_norm": 1.8730920553207397, "learning_rate": 4.896140676481653e-06, "loss": 1.0944, "step": 25214 }, { "epoch": 0.9030028470643007, "grad_norm": 1.4049091339111328, "learning_rate": 4.892556384368607e-06, "loss": 1.4781, "step": 25215 }, { "epoch": 0.9030386591938689, "grad_norm": 1.9881552457809448, "learning_rate": 4.8889733718035295e-06, "loss": 1.507, "step": 25216 }, { "epoch": 0.9030744713234372, "grad_norm": 1.4669917821884155, "learning_rate": 4.885391638834646e-06, "loss": 1.2178, "step": 25217 }, { "epoch": 0.9031102834530055, "grad_norm": 2.0855116844177246, "learning_rate": 4.88181118551011e-06, "loss": 1.1911, "step": 25218 }, { "epoch": 0.9031460955825739, "grad_norm": 1.5934829711914062, "learning_rate": 4.878232011878136e-06, "loss": 1.4744, "step": 25219 }, { "epoch": 0.9031819077121421, "grad_norm": 2.0535361766815186, "learning_rate": 4.874654117986821e-06, "loss": 1.5789, "step": 25220 }, { "epoch": 0.9032177198417104, "grad_norm": 1.54163658618927, "learning_rate": 4.871077503884358e-06, "loss": 1.5965, "step": 25221 }, { "epoch": 0.9032535319712787, "grad_norm": 1.860435962677002, "learning_rate": 4.86750216961882e-06, "loss": 1.2296, "step": 25222 }, { "epoch": 0.9032893441008469, "grad_norm": 1.6049903631210327, "learning_rate": 4.863928115238336e-06, "loss": 1.3, "step": 25223 }, { "epoch": 0.9033251562304152, "grad_norm": 1.5540401935577393, "learning_rate": 4.860355340790978e-06, "loss": 1.128, "step": 25224 }, { "epoch": 0.9033609683599835, "grad_norm": 1.7751628160476685, "learning_rate": 4.85678384632483e-06, "loss": 1.462, "step": 25225 }, { "epoch": 0.9033967804895519, "grad_norm": 2.4625301361083984, "learning_rate": 4.8532136318879315e-06, "loss": 1.7404, "step": 25226 }, { "epoch": 0.9034325926191201, "grad_norm": 1.419631838798523, "learning_rate": 4.8496446975282885e-06, "loss": 1.5578, "step": 25227 }, { "epoch": 0.9034684047486884, "grad_norm": 1.7044492959976196, "learning_rate": 4.846077043293973e-06, "loss": 1.1315, "step": 25228 }, { "epoch": 0.9035042168782567, "grad_norm": 1.758029818534851, "learning_rate": 4.842510669232925e-06, "loss": 1.3711, "step": 25229 }, { "epoch": 0.9035400290078249, "grad_norm": 2.1265738010406494, "learning_rate": 4.8389455753931726e-06, "loss": 1.2978, "step": 25230 }, { "epoch": 0.9035758411373932, "grad_norm": 1.8212213516235352, "learning_rate": 4.835381761822633e-06, "loss": 1.5178, "step": 25231 }, { "epoch": 0.9036116532669615, "grad_norm": 1.3521546125411987, "learning_rate": 4.831819228569301e-06, "loss": 1.1085, "step": 25232 }, { "epoch": 0.9036474653965298, "grad_norm": 1.5324915647506714, "learning_rate": 4.828257975681072e-06, "loss": 1.7123, "step": 25233 }, { "epoch": 0.9036832775260981, "grad_norm": 2.087040901184082, "learning_rate": 4.824698003205863e-06, "loss": 1.4467, "step": 25234 }, { "epoch": 0.9037190896556664, "grad_norm": 1.3491966724395752, "learning_rate": 4.8211393111915915e-06, "loss": 1.2438, "step": 25235 }, { "epoch": 0.9037549017852347, "grad_norm": 1.7652667760849, "learning_rate": 4.817581899686108e-06, "loss": 1.6547, "step": 25236 }, { "epoch": 0.9037907139148029, "grad_norm": 1.6787374019622803, "learning_rate": 4.814025768737296e-06, "loss": 1.2073, "step": 25237 }, { "epoch": 0.9038265260443712, "grad_norm": 1.5756950378417969, "learning_rate": 4.810470918392962e-06, "loss": 1.3766, "step": 25238 }, { "epoch": 0.9038623381739395, "grad_norm": 1.498803973197937, "learning_rate": 4.8069173487009785e-06, "loss": 1.3329, "step": 25239 }, { "epoch": 0.9038981503035078, "grad_norm": 1.668433427810669, "learning_rate": 4.803365059709131e-06, "loss": 1.4101, "step": 25240 }, { "epoch": 0.9039339624330761, "grad_norm": 1.7079111337661743, "learning_rate": 4.799814051465212e-06, "loss": 1.6624, "step": 25241 }, { "epoch": 0.9039697745626444, "grad_norm": 1.6564069986343384, "learning_rate": 4.7962643240169854e-06, "loss": 1.2615, "step": 25242 }, { "epoch": 0.9040055866922126, "grad_norm": 1.8872987031936646, "learning_rate": 4.792715877412213e-06, "loss": 1.494, "step": 25243 }, { "epoch": 0.9040413988217809, "grad_norm": 1.6588809490203857, "learning_rate": 4.789168711698655e-06, "loss": 1.5164, "step": 25244 }, { "epoch": 0.9040772109513492, "grad_norm": 1.7760930061340332, "learning_rate": 4.785622826924019e-06, "loss": 1.64, "step": 25245 }, { "epoch": 0.9041130230809175, "grad_norm": 1.5673993825912476, "learning_rate": 4.782078223135999e-06, "loss": 1.6067, "step": 25246 }, { "epoch": 0.9041488352104858, "grad_norm": 1.5011767148971558, "learning_rate": 4.778534900382292e-06, "loss": 1.441, "step": 25247 }, { "epoch": 0.9041846473400541, "grad_norm": 1.4967342615127563, "learning_rate": 4.774992858710581e-06, "loss": 1.2915, "step": 25248 }, { "epoch": 0.9042204594696224, "grad_norm": 1.8950409889221191, "learning_rate": 4.771452098168494e-06, "loss": 1.6408, "step": 25249 }, { "epoch": 0.9042562715991906, "grad_norm": 1.2963480949401855, "learning_rate": 4.767912618803705e-06, "loss": 1.1706, "step": 25250 }, { "epoch": 0.9042920837287589, "grad_norm": 1.2724201679229736, "learning_rate": 4.764374420663808e-06, "loss": 1.3548, "step": 25251 }, { "epoch": 0.9043278958583272, "grad_norm": 2.205700159072876, "learning_rate": 4.7608375037964e-06, "loss": 1.2672, "step": 25252 }, { "epoch": 0.9043637079878954, "grad_norm": 1.8525762557983398, "learning_rate": 4.757301868249076e-06, "loss": 1.2589, "step": 25253 }, { "epoch": 0.9043995201174638, "grad_norm": 1.4828543663024902, "learning_rate": 4.753767514069396e-06, "loss": 1.5784, "step": 25254 }, { "epoch": 0.9044353322470321, "grad_norm": 1.468957543373108, "learning_rate": 4.750234441304924e-06, "loss": 1.479, "step": 25255 }, { "epoch": 0.9044711443766004, "grad_norm": 1.6229407787322998, "learning_rate": 4.746702650003176e-06, "loss": 1.2722, "step": 25256 }, { "epoch": 0.9045069565061686, "grad_norm": 2.103856086730957, "learning_rate": 4.743172140211683e-06, "loss": 1.305, "step": 25257 }, { "epoch": 0.9045427686357369, "grad_norm": 1.640134572982788, "learning_rate": 4.7396429119779265e-06, "loss": 1.3592, "step": 25258 }, { "epoch": 0.9045785807653052, "grad_norm": 1.485317349433899, "learning_rate": 4.736114965349414e-06, "loss": 1.4747, "step": 25259 }, { "epoch": 0.9046143928948734, "grad_norm": 1.4240471124649048, "learning_rate": 4.732588300373586e-06, "loss": 1.2655, "step": 25260 }, { "epoch": 0.9046502050244418, "grad_norm": 1.6139092445373535, "learning_rate": 4.729062917097882e-06, "loss": 1.0557, "step": 25261 }, { "epoch": 0.9046860171540101, "grad_norm": 2.000537633895874, "learning_rate": 4.725538815569774e-06, "loss": 1.6511, "step": 25262 }, { "epoch": 0.9047218292835784, "grad_norm": 1.554819941520691, "learning_rate": 4.722015995836626e-06, "loss": 1.5654, "step": 25263 }, { "epoch": 0.9047576414131466, "grad_norm": 2.3554747104644775, "learning_rate": 4.718494457945855e-06, "loss": 1.5196, "step": 25264 }, { "epoch": 0.9047934535427149, "grad_norm": 1.2216877937316895, "learning_rate": 4.714974201944833e-06, "loss": 1.3913, "step": 25265 }, { "epoch": 0.9048292656722832, "grad_norm": 1.3515695333480835, "learning_rate": 4.711455227880935e-06, "loss": 1.4451, "step": 25266 }, { "epoch": 0.9048650778018514, "grad_norm": 1.557699203491211, "learning_rate": 4.707937535801488e-06, "loss": 1.023, "step": 25267 }, { "epoch": 0.9049008899314198, "grad_norm": 1.7648674249649048, "learning_rate": 4.704421125753822e-06, "loss": 1.4819, "step": 25268 }, { "epoch": 0.9049367020609881, "grad_norm": 2.089319944381714, "learning_rate": 4.700905997785254e-06, "loss": 1.3192, "step": 25269 }, { "epoch": 0.9049725141905564, "grad_norm": 1.6241894960403442, "learning_rate": 4.697392151943059e-06, "loss": 1.2065, "step": 25270 }, { "epoch": 0.9050083263201246, "grad_norm": 1.50771164894104, "learning_rate": 4.693879588274519e-06, "loss": 1.334, "step": 25271 }, { "epoch": 0.9050441384496929, "grad_norm": 1.4906578063964844, "learning_rate": 4.690368306826898e-06, "loss": 1.547, "step": 25272 }, { "epoch": 0.9050799505792612, "grad_norm": 1.5478224754333496, "learning_rate": 4.686858307647446e-06, "loss": 1.4141, "step": 25273 }, { "epoch": 0.9051157627088294, "grad_norm": 1.7823578119277954, "learning_rate": 4.683349590783348e-06, "loss": 1.5894, "step": 25274 }, { "epoch": 0.9051515748383978, "grad_norm": 1.496527075767517, "learning_rate": 4.679842156281844e-06, "loss": 1.2226, "step": 25275 }, { "epoch": 0.9051873869679661, "grad_norm": 1.2077003717422485, "learning_rate": 4.676336004190096e-06, "loss": 1.1825, "step": 25276 }, { "epoch": 0.9052231990975343, "grad_norm": 1.6871800422668457, "learning_rate": 4.6728311345553115e-06, "loss": 1.2391, "step": 25277 }, { "epoch": 0.9052590112271026, "grad_norm": 1.8817455768585205, "learning_rate": 4.669327547424607e-06, "loss": 1.5434, "step": 25278 }, { "epoch": 0.9052948233566709, "grad_norm": 1.5902183055877686, "learning_rate": 4.665825242845134e-06, "loss": 1.4759, "step": 25279 }, { "epoch": 0.9053306354862392, "grad_norm": 1.7828710079193115, "learning_rate": 4.662324220864011e-06, "loss": 1.3709, "step": 25280 }, { "epoch": 0.9053664476158074, "grad_norm": 1.9528136253356934, "learning_rate": 4.658824481528335e-06, "loss": 1.3893, "step": 25281 }, { "epoch": 0.9054022597453758, "grad_norm": 2.271791934967041, "learning_rate": 4.655326024885198e-06, "loss": 1.5958, "step": 25282 }, { "epoch": 0.9054380718749441, "grad_norm": 1.7135944366455078, "learning_rate": 4.651828850981654e-06, "loss": 1.7616, "step": 25283 }, { "epoch": 0.9054738840045123, "grad_norm": 2.0489044189453125, "learning_rate": 4.6483329598647874e-06, "loss": 1.4232, "step": 25284 }, { "epoch": 0.9055096961340806, "grad_norm": 1.7245614528656006, "learning_rate": 4.644838351581582e-06, "loss": 1.6448, "step": 25285 }, { "epoch": 0.9055455082636489, "grad_norm": 1.375653624534607, "learning_rate": 4.6413450261790894e-06, "loss": 1.3573, "step": 25286 }, { "epoch": 0.9055813203932171, "grad_norm": 1.45327889919281, "learning_rate": 4.637852983704294e-06, "loss": 1.5115, "step": 25287 }, { "epoch": 0.9056171325227854, "grad_norm": 1.4247119426727295, "learning_rate": 4.63436222420417e-06, "loss": 1.488, "step": 25288 }, { "epoch": 0.9056529446523538, "grad_norm": 1.779223918914795, "learning_rate": 4.630872747725701e-06, "loss": 1.5267, "step": 25289 }, { "epoch": 0.9056887567819221, "grad_norm": 1.4207184314727783, "learning_rate": 4.627384554315806e-06, "loss": 1.3127, "step": 25290 }, { "epoch": 0.9057245689114903, "grad_norm": 1.3641339540481567, "learning_rate": 4.623897644021446e-06, "loss": 1.1716, "step": 25291 }, { "epoch": 0.9057603810410586, "grad_norm": 1.7971229553222656, "learning_rate": 4.6204120168895085e-06, "loss": 1.7832, "step": 25292 }, { "epoch": 0.9057961931706269, "grad_norm": 1.6615642309188843, "learning_rate": 4.616927672966898e-06, "loss": 1.2579, "step": 25293 }, { "epoch": 0.9058320053001951, "grad_norm": 1.582353949546814, "learning_rate": 4.6134446123004885e-06, "loss": 1.2602, "step": 25294 }, { "epoch": 0.9058678174297634, "grad_norm": 1.9554626941680908, "learning_rate": 4.609962834937153e-06, "loss": 1.2429, "step": 25295 }, { "epoch": 0.9059036295593318, "grad_norm": 1.6370717287063599, "learning_rate": 4.606482340923712e-06, "loss": 1.3997, "step": 25296 }, { "epoch": 0.9059394416889001, "grad_norm": 1.5962556600570679, "learning_rate": 4.6030031303070045e-06, "loss": 1.5322, "step": 25297 }, { "epoch": 0.9059752538184683, "grad_norm": 1.372740387916565, "learning_rate": 4.599525203133848e-06, "loss": 1.2582, "step": 25298 }, { "epoch": 0.9060110659480366, "grad_norm": 1.5764113664627075, "learning_rate": 4.596048559451005e-06, "loss": 1.4412, "step": 25299 }, { "epoch": 0.9060468780776049, "grad_norm": 1.6428806781768799, "learning_rate": 4.592573199305272e-06, "loss": 1.5331, "step": 25300 }, { "epoch": 0.9060826902071731, "grad_norm": 2.063018560409546, "learning_rate": 4.58909912274339e-06, "loss": 1.4299, "step": 25301 }, { "epoch": 0.9061185023367414, "grad_norm": 1.7012205123901367, "learning_rate": 4.585626329812132e-06, "loss": 1.6607, "step": 25302 }, { "epoch": 0.9061543144663098, "grad_norm": 1.4773954153060913, "learning_rate": 4.582154820558182e-06, "loss": 1.2731, "step": 25303 }, { "epoch": 0.906190126595878, "grad_norm": 1.781326413154602, "learning_rate": 4.5786845950282486e-06, "loss": 1.2667, "step": 25304 }, { "epoch": 0.9062259387254463, "grad_norm": 1.574438452720642, "learning_rate": 4.575215653269061e-06, "loss": 1.4534, "step": 25305 }, { "epoch": 0.9062617508550146, "grad_norm": 1.2895307540893555, "learning_rate": 4.571747995327224e-06, "loss": 1.4442, "step": 25306 }, { "epoch": 0.9062975629845829, "grad_norm": 1.6740641593933105, "learning_rate": 4.568281621249437e-06, "loss": 1.269, "step": 25307 }, { "epoch": 0.9063333751141511, "grad_norm": 2.1443235874176025, "learning_rate": 4.564816531082316e-06, "loss": 1.6352, "step": 25308 }, { "epoch": 0.9063691872437194, "grad_norm": 1.7951942682266235, "learning_rate": 4.561352724872503e-06, "loss": 1.3721, "step": 25309 }, { "epoch": 0.9064049993732878, "grad_norm": 1.5101568698883057, "learning_rate": 4.557890202666571e-06, "loss": 1.2677, "step": 25310 }, { "epoch": 0.906440811502856, "grad_norm": 1.488024353981018, "learning_rate": 4.5544289645111145e-06, "loss": 1.3814, "step": 25311 }, { "epoch": 0.9064766236324243, "grad_norm": 1.546871304512024, "learning_rate": 4.5509690104526995e-06, "loss": 1.1841, "step": 25312 }, { "epoch": 0.9065124357619926, "grad_norm": 1.6033934354782104, "learning_rate": 4.547510340537886e-06, "loss": 1.4059, "step": 25313 }, { "epoch": 0.9065482478915609, "grad_norm": 1.8143798112869263, "learning_rate": 4.544052954813194e-06, "loss": 1.2067, "step": 25314 }, { "epoch": 0.9065840600211291, "grad_norm": 1.500676155090332, "learning_rate": 4.540596853325119e-06, "loss": 1.2754, "step": 25315 }, { "epoch": 0.9066198721506974, "grad_norm": 1.7909812927246094, "learning_rate": 4.537142036120212e-06, "loss": 1.3087, "step": 25316 }, { "epoch": 0.9066556842802658, "grad_norm": 1.3690929412841797, "learning_rate": 4.533688503244893e-06, "loss": 1.2588, "step": 25317 }, { "epoch": 0.906691496409834, "grad_norm": 1.785294532775879, "learning_rate": 4.5302362547456565e-06, "loss": 1.3569, "step": 25318 }, { "epoch": 0.9067273085394023, "grad_norm": 1.5315146446228027, "learning_rate": 4.5267852906689555e-06, "loss": 1.4828, "step": 25319 }, { "epoch": 0.9067631206689706, "grad_norm": 1.328297734260559, "learning_rate": 4.523335611061208e-06, "loss": 1.5227, "step": 25320 }, { "epoch": 0.9067989327985388, "grad_norm": 1.671176791191101, "learning_rate": 4.51988721596881e-06, "loss": 1.3934, "step": 25321 }, { "epoch": 0.9068347449281071, "grad_norm": 1.6849404573440552, "learning_rate": 4.51644010543818e-06, "loss": 1.3083, "step": 25322 }, { "epoch": 0.9068705570576754, "grad_norm": 1.6850334405899048, "learning_rate": 4.512994279515692e-06, "loss": 1.5317, "step": 25323 }, { "epoch": 0.9069063691872438, "grad_norm": 1.4295352697372437, "learning_rate": 4.509549738247676e-06, "loss": 1.378, "step": 25324 }, { "epoch": 0.906942181316812, "grad_norm": 2.1510770320892334, "learning_rate": 4.5061064816805165e-06, "loss": 1.4521, "step": 25325 }, { "epoch": 0.9069779934463803, "grad_norm": 1.9867370128631592, "learning_rate": 4.502664509860488e-06, "loss": 1.4834, "step": 25326 }, { "epoch": 0.9070138055759486, "grad_norm": 1.2240649461746216, "learning_rate": 4.499223822833942e-06, "loss": 1.3368, "step": 25327 }, { "epoch": 0.9070496177055168, "grad_norm": 1.844240665435791, "learning_rate": 4.4957844206471535e-06, "loss": 1.3575, "step": 25328 }, { "epoch": 0.9070854298350851, "grad_norm": 1.304735779762268, "learning_rate": 4.492346303346395e-06, "loss": 1.1249, "step": 25329 }, { "epoch": 0.9071212419646534, "grad_norm": 1.8168039321899414, "learning_rate": 4.488909470977909e-06, "loss": 1.3287, "step": 25330 }, { "epoch": 0.9071570540942218, "grad_norm": 1.2686028480529785, "learning_rate": 4.485473923587957e-06, "loss": 1.5049, "step": 25331 }, { "epoch": 0.90719286622379, "grad_norm": 1.6690572500228882, "learning_rate": 4.482039661222759e-06, "loss": 1.43, "step": 25332 }, { "epoch": 0.9072286783533583, "grad_norm": 1.5077745914459229, "learning_rate": 4.478606683928476e-06, "loss": 1.6013, "step": 25333 }, { "epoch": 0.9072644904829266, "grad_norm": 1.62128484249115, "learning_rate": 4.475174991751352e-06, "loss": 1.1891, "step": 25334 }, { "epoch": 0.9073003026124948, "grad_norm": 1.4367783069610596, "learning_rate": 4.471744584737525e-06, "loss": 1.5777, "step": 25335 }, { "epoch": 0.9073361147420631, "grad_norm": 2.2138447761535645, "learning_rate": 4.468315462933159e-06, "loss": 1.4724, "step": 25336 }, { "epoch": 0.9073719268716314, "grad_norm": 1.362805724143982, "learning_rate": 4.464887626384362e-06, "loss": 1.3973, "step": 25337 }, { "epoch": 0.9074077390011998, "grad_norm": 1.5935183763504028, "learning_rate": 4.461461075137285e-06, "loss": 1.6762, "step": 25338 }, { "epoch": 0.907443551130768, "grad_norm": 1.413731575012207, "learning_rate": 4.458035809238026e-06, "loss": 1.4027, "step": 25339 }, { "epoch": 0.9074793632603363, "grad_norm": 1.4532414674758911, "learning_rate": 4.454611828732636e-06, "loss": 1.385, "step": 25340 }, { "epoch": 0.9075151753899046, "grad_norm": 1.9259858131408691, "learning_rate": 4.4511891336671885e-06, "loss": 1.6331, "step": 25341 }, { "epoch": 0.9075509875194728, "grad_norm": 1.7889636754989624, "learning_rate": 4.447767724087759e-06, "loss": 1.1333, "step": 25342 }, { "epoch": 0.9075867996490411, "grad_norm": 1.47054922580719, "learning_rate": 4.444347600040366e-06, "loss": 1.4389, "step": 25343 }, { "epoch": 0.9076226117786094, "grad_norm": 1.6787251234054565, "learning_rate": 4.440928761570995e-06, "loss": 1.1389, "step": 25344 }, { "epoch": 0.9076584239081777, "grad_norm": 1.4817570447921753, "learning_rate": 4.4375112087256864e-06, "loss": 1.1824, "step": 25345 }, { "epoch": 0.907694236037746, "grad_norm": 2.3525238037109375, "learning_rate": 4.434094941550393e-06, "loss": 1.3532, "step": 25346 }, { "epoch": 0.9077300481673143, "grad_norm": 1.610073208808899, "learning_rate": 4.430679960091089e-06, "loss": 1.6939, "step": 25347 }, { "epoch": 0.9077658602968826, "grad_norm": 2.0152218341827393, "learning_rate": 4.427266264393693e-06, "loss": 1.2251, "step": 25348 }, { "epoch": 0.9078016724264508, "grad_norm": 1.7770185470581055, "learning_rate": 4.423853854504156e-06, "loss": 1.3707, "step": 25349 }, { "epoch": 0.9078374845560191, "grad_norm": 2.1795554161071777, "learning_rate": 4.420442730468388e-06, "loss": 1.2333, "step": 25350 }, { "epoch": 0.9078732966855874, "grad_norm": 1.550978660583496, "learning_rate": 4.417032892332263e-06, "loss": 1.528, "step": 25351 }, { "epoch": 0.9079091088151557, "grad_norm": 1.501692533493042, "learning_rate": 4.413624340141676e-06, "loss": 1.2372, "step": 25352 }, { "epoch": 0.907944920944724, "grad_norm": 1.6500540971755981, "learning_rate": 4.410217073942468e-06, "loss": 1.4474, "step": 25353 }, { "epoch": 0.9079807330742923, "grad_norm": 1.5187865495681763, "learning_rate": 4.4068110937805055e-06, "loss": 1.1773, "step": 25354 }, { "epoch": 0.9080165452038605, "grad_norm": 1.3688457012176514, "learning_rate": 4.40340639970157e-06, "loss": 1.2746, "step": 25355 }, { "epoch": 0.9080523573334288, "grad_norm": 1.6650950908660889, "learning_rate": 4.400002991751495e-06, "loss": 1.6117, "step": 25356 }, { "epoch": 0.9080881694629971, "grad_norm": 1.6353851556777954, "learning_rate": 4.396600869976086e-06, "loss": 1.2558, "step": 25357 }, { "epoch": 0.9081239815925654, "grad_norm": 1.6381622552871704, "learning_rate": 4.393200034421074e-06, "loss": 1.4325, "step": 25358 }, { "epoch": 0.9081597937221337, "grad_norm": 2.1839332580566406, "learning_rate": 4.3898004851322335e-06, "loss": 1.5382, "step": 25359 }, { "epoch": 0.908195605851702, "grad_norm": 1.539484977722168, "learning_rate": 4.386402222155295e-06, "loss": 1.6129, "step": 25360 }, { "epoch": 0.9082314179812703, "grad_norm": 1.4183225631713867, "learning_rate": 4.383005245535998e-06, "loss": 1.3707, "step": 25361 }, { "epoch": 0.9082672301108385, "grad_norm": 1.6400363445281982, "learning_rate": 4.379609555320008e-06, "loss": 0.9586, "step": 25362 }, { "epoch": 0.9083030422404068, "grad_norm": 1.407865047454834, "learning_rate": 4.376215151553042e-06, "loss": 1.3569, "step": 25363 }, { "epoch": 0.9083388543699751, "grad_norm": 1.3456802368164062, "learning_rate": 4.372822034280744e-06, "loss": 0.978, "step": 25364 }, { "epoch": 0.9083746664995433, "grad_norm": 2.59515380859375, "learning_rate": 4.3694302035487965e-06, "loss": 1.2745, "step": 25365 }, { "epoch": 0.9084104786291117, "grad_norm": 1.7671518325805664, "learning_rate": 4.366039659402798e-06, "loss": 1.4961, "step": 25366 }, { "epoch": 0.90844629075868, "grad_norm": 2.6446104049682617, "learning_rate": 4.362650401888369e-06, "loss": 1.2699, "step": 25367 }, { "epoch": 0.9084821028882483, "grad_norm": 1.3323699235916138, "learning_rate": 4.359262431051137e-06, "loss": 1.4321, "step": 25368 }, { "epoch": 0.9085179150178165, "grad_norm": 1.728229284286499, "learning_rate": 4.355875746936644e-06, "loss": 1.5239, "step": 25369 }, { "epoch": 0.9085537271473848, "grad_norm": 1.4732418060302734, "learning_rate": 4.352490349590477e-06, "loss": 1.4759, "step": 25370 }, { "epoch": 0.9085895392769531, "grad_norm": 1.4603962898254395, "learning_rate": 4.349106239058165e-06, "loss": 1.6496, "step": 25371 }, { "epoch": 0.9086253514065213, "grad_norm": 1.721230149269104, "learning_rate": 4.345723415385272e-06, "loss": 1.7606, "step": 25372 }, { "epoch": 0.9086611635360897, "grad_norm": 1.6769062280654907, "learning_rate": 4.342341878617262e-06, "loss": 1.7661, "step": 25373 }, { "epoch": 0.908696975665658, "grad_norm": 2.0991551876068115, "learning_rate": 4.338961628799665e-06, "loss": 1.2793, "step": 25374 }, { "epoch": 0.9087327877952263, "grad_norm": 1.581172227859497, "learning_rate": 4.335582665977944e-06, "loss": 1.4508, "step": 25375 }, { "epoch": 0.9087685999247945, "grad_norm": 1.404631495475769, "learning_rate": 4.332204990197564e-06, "loss": 1.4273, "step": 25376 }, { "epoch": 0.9088044120543628, "grad_norm": 1.1999675035476685, "learning_rate": 4.328828601503943e-06, "loss": 1.2674, "step": 25377 }, { "epoch": 0.9088402241839311, "grad_norm": 1.3518636226654053, "learning_rate": 4.325453499942545e-06, "loss": 1.3804, "step": 25378 }, { "epoch": 0.9088760363134993, "grad_norm": 1.5245866775512695, "learning_rate": 4.322079685558755e-06, "loss": 1.2018, "step": 25379 }, { "epoch": 0.9089118484430677, "grad_norm": 1.940627932548523, "learning_rate": 4.318707158397972e-06, "loss": 1.5514, "step": 25380 }, { "epoch": 0.908947660572636, "grad_norm": 1.9791686534881592, "learning_rate": 4.3153359185055474e-06, "loss": 1.3945, "step": 25381 }, { "epoch": 0.9089834727022043, "grad_norm": 2.057812452316284, "learning_rate": 4.311965965926867e-06, "loss": 1.1848, "step": 25382 }, { "epoch": 0.9090192848317725, "grad_norm": 1.4718939065933228, "learning_rate": 4.308597300707262e-06, "loss": 1.4995, "step": 25383 }, { "epoch": 0.9090550969613408, "grad_norm": 1.5255402326583862, "learning_rate": 4.305229922892029e-06, "loss": 1.418, "step": 25384 }, { "epoch": 0.9090909090909091, "grad_norm": 1.564485788345337, "learning_rate": 4.301863832526498e-06, "loss": 1.1865, "step": 25385 }, { "epoch": 0.9091267212204773, "grad_norm": 1.8961577415466309, "learning_rate": 4.298499029655967e-06, "loss": 1.4887, "step": 25386 }, { "epoch": 0.9091625333500457, "grad_norm": 1.5364948511123657, "learning_rate": 4.295135514325654e-06, "loss": 1.5881, "step": 25387 }, { "epoch": 0.909198345479614, "grad_norm": 2.002300977706909, "learning_rate": 4.291773286580858e-06, "loss": 1.6364, "step": 25388 }, { "epoch": 0.9092341576091822, "grad_norm": 2.0127434730529785, "learning_rate": 4.288412346466797e-06, "loss": 1.3339, "step": 25389 }, { "epoch": 0.9092699697387505, "grad_norm": 1.5489211082458496, "learning_rate": 4.28505269402869e-06, "loss": 1.3824, "step": 25390 }, { "epoch": 0.9093057818683188, "grad_norm": 1.6152890920639038, "learning_rate": 4.281694329311736e-06, "loss": 1.3801, "step": 25391 }, { "epoch": 0.909341593997887, "grad_norm": 2.6852145195007324, "learning_rate": 4.278337252361109e-06, "loss": 1.5435, "step": 25392 }, { "epoch": 0.9093774061274553, "grad_norm": 2.5400712490081787, "learning_rate": 4.2749814632219946e-06, "loss": 1.4603, "step": 25393 }, { "epoch": 0.9094132182570237, "grad_norm": 2.176903486251831, "learning_rate": 4.271626961939524e-06, "loss": 1.4666, "step": 25394 }, { "epoch": 0.909449030386592, "grad_norm": 1.6950504779815674, "learning_rate": 4.268273748558815e-06, "loss": 1.3297, "step": 25395 }, { "epoch": 0.9094848425161602, "grad_norm": 1.5055739879608154, "learning_rate": 4.264921823125012e-06, "loss": 1.4014, "step": 25396 }, { "epoch": 0.9095206546457285, "grad_norm": 1.2444562911987305, "learning_rate": 4.261571185683211e-06, "loss": 1.4675, "step": 25397 }, { "epoch": 0.9095564667752968, "grad_norm": 2.0132944583892822, "learning_rate": 4.258221836278453e-06, "loss": 1.6443, "step": 25398 }, { "epoch": 0.909592278904865, "grad_norm": 1.8058809041976929, "learning_rate": 4.2548737749558255e-06, "loss": 1.347, "step": 25399 }, { "epoch": 0.9096280910344333, "grad_norm": 1.5747233629226685, "learning_rate": 4.2515270017603695e-06, "loss": 1.6271, "step": 25400 }, { "epoch": 0.9096639031640017, "grad_norm": 1.754483699798584, "learning_rate": 4.248181516737127e-06, "loss": 1.4832, "step": 25401 }, { "epoch": 0.90969971529357, "grad_norm": 1.453294038772583, "learning_rate": 4.244837319931072e-06, "loss": 1.5246, "step": 25402 }, { "epoch": 0.9097355274231382, "grad_norm": 1.683763027191162, "learning_rate": 4.2414944113872255e-06, "loss": 1.3641, "step": 25403 }, { "epoch": 0.9097713395527065, "grad_norm": 1.6361896991729736, "learning_rate": 4.2381527911505625e-06, "loss": 1.8167, "step": 25404 }, { "epoch": 0.9098071516822748, "grad_norm": 1.424181580543518, "learning_rate": 4.234812459266013e-06, "loss": 1.5335, "step": 25405 }, { "epoch": 0.909842963811843, "grad_norm": 2.2374167442321777, "learning_rate": 4.231473415778531e-06, "loss": 1.4462, "step": 25406 }, { "epoch": 0.9098787759414113, "grad_norm": 1.852335810661316, "learning_rate": 4.228135660733046e-06, "loss": 1.648, "step": 25407 }, { "epoch": 0.9099145880709797, "grad_norm": 2.4960708618164062, "learning_rate": 4.224799194174467e-06, "loss": 1.3632, "step": 25408 }, { "epoch": 0.909950400200548, "grad_norm": 1.384843349456787, "learning_rate": 4.221464016147669e-06, "loss": 1.5738, "step": 25409 }, { "epoch": 0.9099862123301162, "grad_norm": 1.7134809494018555, "learning_rate": 4.218130126697517e-06, "loss": 1.5194, "step": 25410 }, { "epoch": 0.9100220244596845, "grad_norm": 1.2886848449707031, "learning_rate": 4.214797525868897e-06, "loss": 1.2627, "step": 25411 }, { "epoch": 0.9100578365892528, "grad_norm": 1.9155702590942383, "learning_rate": 4.2114662137066055e-06, "loss": 1.8567, "step": 25412 }, { "epoch": 0.910093648718821, "grad_norm": 1.4528340101242065, "learning_rate": 4.208136190255485e-06, "loss": 1.4138, "step": 25413 }, { "epoch": 0.9101294608483893, "grad_norm": 1.6072170734405518, "learning_rate": 4.204807455560311e-06, "loss": 1.4734, "step": 25414 }, { "epoch": 0.9101652729779577, "grad_norm": 1.2965353727340698, "learning_rate": 4.201480009665915e-06, "loss": 1.3501, "step": 25415 }, { "epoch": 0.910201085107526, "grad_norm": 1.2253295183181763, "learning_rate": 4.198153852617015e-06, "loss": 1.2337, "step": 25416 }, { "epoch": 0.9102368972370942, "grad_norm": 2.0200018882751465, "learning_rate": 4.194828984458376e-06, "loss": 1.5565, "step": 25417 }, { "epoch": 0.9102727093666625, "grad_norm": 1.6863843202590942, "learning_rate": 4.191505405234741e-06, "loss": 1.4327, "step": 25418 }, { "epoch": 0.9103085214962308, "grad_norm": 1.7367192506790161, "learning_rate": 4.188183114990829e-06, "loss": 1.4891, "step": 25419 }, { "epoch": 0.910344333625799, "grad_norm": 2.484041452407837, "learning_rate": 4.1848621137713154e-06, "loss": 1.5881, "step": 25420 }, { "epoch": 0.9103801457553673, "grad_norm": 1.6672288179397583, "learning_rate": 4.181542401620875e-06, "loss": 1.2391, "step": 25421 }, { "epoch": 0.9104159578849357, "grad_norm": 1.5806713104248047, "learning_rate": 4.178223978584206e-06, "loss": 1.6583, "step": 25422 }, { "epoch": 0.910451770014504, "grad_norm": 1.8388357162475586, "learning_rate": 4.174906844705917e-06, "loss": 1.246, "step": 25423 }, { "epoch": 0.9104875821440722, "grad_norm": 1.3316192626953125, "learning_rate": 4.171591000030672e-06, "loss": 1.4997, "step": 25424 }, { "epoch": 0.9105233942736405, "grad_norm": 1.8472051620483398, "learning_rate": 4.168276444603026e-06, "loss": 1.4753, "step": 25425 }, { "epoch": 0.9105592064032088, "grad_norm": 1.7919559478759766, "learning_rate": 4.164963178467629e-06, "loss": 1.5098, "step": 25426 }, { "epoch": 0.910595018532777, "grad_norm": 1.5775041580200195, "learning_rate": 4.161651201669036e-06, "loss": 1.531, "step": 25427 }, { "epoch": 0.9106308306623453, "grad_norm": 2.2557876110076904, "learning_rate": 4.1583405142517906e-06, "loss": 1.5838, "step": 25428 }, { "epoch": 0.9106666427919137, "grad_norm": 1.9684809446334839, "learning_rate": 4.155031116260466e-06, "loss": 1.2069, "step": 25429 }, { "epoch": 0.9107024549214819, "grad_norm": 1.215849757194519, "learning_rate": 4.15172300773955e-06, "loss": 1.4853, "step": 25430 }, { "epoch": 0.9107382670510502, "grad_norm": 1.6601389646530151, "learning_rate": 4.148416188733584e-06, "loss": 1.3532, "step": 25431 }, { "epoch": 0.9107740791806185, "grad_norm": 1.467976450920105, "learning_rate": 4.1451106592869995e-06, "loss": 1.3732, "step": 25432 }, { "epoch": 0.9108098913101867, "grad_norm": 1.4758421182632446, "learning_rate": 4.14180641944435e-06, "loss": 1.4902, "step": 25433 }, { "epoch": 0.910845703439755, "grad_norm": 1.355146884918213, "learning_rate": 4.138503469250021e-06, "loss": 1.3033, "step": 25434 }, { "epoch": 0.9108815155693233, "grad_norm": 1.4203933477401733, "learning_rate": 4.13520180874849e-06, "loss": 1.3695, "step": 25435 }, { "epoch": 0.9109173276988917, "grad_norm": 2.0352320671081543, "learning_rate": 4.131901437984153e-06, "loss": 1.651, "step": 25436 }, { "epoch": 0.9109531398284599, "grad_norm": 1.4367496967315674, "learning_rate": 4.128602357001421e-06, "loss": 1.3241, "step": 25437 }, { "epoch": 0.9109889519580282, "grad_norm": 1.3705084323883057, "learning_rate": 4.12530456584469e-06, "loss": 1.5884, "step": 25438 }, { "epoch": 0.9110247640875965, "grad_norm": 1.4604884386062622, "learning_rate": 4.122008064558313e-06, "loss": 1.295, "step": 25439 }, { "epoch": 0.9110605762171647, "grad_norm": 1.8145109415054321, "learning_rate": 4.118712853186634e-06, "loss": 1.5224, "step": 25440 }, { "epoch": 0.911096388346733, "grad_norm": 1.4539945125579834, "learning_rate": 4.115418931773996e-06, "loss": 1.6827, "step": 25441 }, { "epoch": 0.9111322004763013, "grad_norm": 1.368727445602417, "learning_rate": 4.112126300364727e-06, "loss": 1.4317, "step": 25442 }, { "epoch": 0.9111680126058697, "grad_norm": 1.5504074096679688, "learning_rate": 4.108834959003094e-06, "loss": 1.4278, "step": 25443 }, { "epoch": 0.9112038247354379, "grad_norm": 1.5621628761291504, "learning_rate": 4.1055449077334165e-06, "loss": 1.2978, "step": 25444 }, { "epoch": 0.9112396368650062, "grad_norm": 1.6070566177368164, "learning_rate": 4.102256146599936e-06, "loss": 1.158, "step": 25445 }, { "epoch": 0.9112754489945745, "grad_norm": 2.3168065547943115, "learning_rate": 4.098968675646886e-06, "loss": 1.3911, "step": 25446 }, { "epoch": 0.9113112611241427, "grad_norm": 1.7678879499435425, "learning_rate": 4.095682494918507e-06, "loss": 1.1655, "step": 25447 }, { "epoch": 0.911347073253711, "grad_norm": 2.47255539894104, "learning_rate": 4.092397604459019e-06, "loss": 1.484, "step": 25448 }, { "epoch": 0.9113828853832793, "grad_norm": 1.4481755495071411, "learning_rate": 4.089114004312622e-06, "loss": 1.3727, "step": 25449 }, { "epoch": 0.9114186975128477, "grad_norm": 1.532240390777588, "learning_rate": 4.085831694523456e-06, "loss": 1.5036, "step": 25450 }, { "epoch": 0.9114545096424159, "grad_norm": 1.6303825378417969, "learning_rate": 4.082550675135721e-06, "loss": 1.2653, "step": 25451 }, { "epoch": 0.9114903217719842, "grad_norm": 2.0714213848114014, "learning_rate": 4.079270946193525e-06, "loss": 1.2285, "step": 25452 }, { "epoch": 0.9115261339015525, "grad_norm": 2.202430486679077, "learning_rate": 4.075992507741033e-06, "loss": 1.3034, "step": 25453 }, { "epoch": 0.9115619460311207, "grad_norm": 2.01389741897583, "learning_rate": 4.07271535982231e-06, "loss": 1.6597, "step": 25454 }, { "epoch": 0.911597758160689, "grad_norm": 1.577217698097229, "learning_rate": 4.0694395024814754e-06, "loss": 1.4017, "step": 25455 }, { "epoch": 0.9116335702902573, "grad_norm": 1.35993230342865, "learning_rate": 4.066164935762595e-06, "loss": 1.406, "step": 25456 }, { "epoch": 0.9116693824198256, "grad_norm": 1.9778926372528076, "learning_rate": 4.062891659709711e-06, "loss": 1.3003, "step": 25457 }, { "epoch": 0.9117051945493939, "grad_norm": 1.6684564352035522, "learning_rate": 4.059619674366866e-06, "loss": 1.5999, "step": 25458 }, { "epoch": 0.9117410066789622, "grad_norm": 2.0282845497131348, "learning_rate": 4.05634897977808e-06, "loss": 1.3524, "step": 25459 }, { "epoch": 0.9117768188085305, "grad_norm": 2.274070978164673, "learning_rate": 4.053079575987384e-06, "loss": 1.3631, "step": 25460 }, { "epoch": 0.9118126309380987, "grad_norm": 1.6663633584976196, "learning_rate": 4.049811463038722e-06, "loss": 1.5455, "step": 25461 }, { "epoch": 0.911848443067667, "grad_norm": 1.4195297956466675, "learning_rate": 4.0465446409760795e-06, "loss": 1.4995, "step": 25462 }, { "epoch": 0.9118842551972353, "grad_norm": 1.5641138553619385, "learning_rate": 4.043279109843412e-06, "loss": 1.4892, "step": 25463 }, { "epoch": 0.9119200673268036, "grad_norm": 1.4835494756698608, "learning_rate": 4.04001486968465e-06, "loss": 1.4001, "step": 25464 }, { "epoch": 0.9119558794563719, "grad_norm": 1.8441171646118164, "learning_rate": 4.036751920543702e-06, "loss": 1.6182, "step": 25465 }, { "epoch": 0.9119916915859402, "grad_norm": 2.2617461681365967, "learning_rate": 4.033490262464468e-06, "loss": 1.2301, "step": 25466 }, { "epoch": 0.9120275037155084, "grad_norm": 1.4524611234664917, "learning_rate": 4.030229895490856e-06, "loss": 1.5047, "step": 25467 }, { "epoch": 0.9120633158450767, "grad_norm": 1.821845531463623, "learning_rate": 4.026970819666698e-06, "loss": 1.2156, "step": 25468 }, { "epoch": 0.912099127974645, "grad_norm": 1.517996072769165, "learning_rate": 4.023713035035836e-06, "loss": 1.4621, "step": 25469 }, { "epoch": 0.9121349401042133, "grad_norm": 2.0149309635162354, "learning_rate": 4.020456541642126e-06, "loss": 1.4772, "step": 25470 }, { "epoch": 0.9121707522337816, "grad_norm": 1.5749101638793945, "learning_rate": 4.017201339529386e-06, "loss": 1.3722, "step": 25471 }, { "epoch": 0.9122065643633499, "grad_norm": 1.673729658126831, "learning_rate": 4.013947428741372e-06, "loss": 1.6295, "step": 25472 }, { "epoch": 0.9122423764929182, "grad_norm": 1.4756914377212524, "learning_rate": 4.01069480932188e-06, "loss": 1.5144, "step": 25473 }, { "epoch": 0.9122781886224864, "grad_norm": 1.3629252910614014, "learning_rate": 4.007443481314699e-06, "loss": 1.3908, "step": 25474 }, { "epoch": 0.9123140007520547, "grad_norm": 1.5156805515289307, "learning_rate": 4.0041934447635156e-06, "loss": 1.3617, "step": 25475 }, { "epoch": 0.912349812881623, "grad_norm": 1.570890188217163, "learning_rate": 4.000944699712094e-06, "loss": 1.2795, "step": 25476 }, { "epoch": 0.9123856250111912, "grad_norm": 1.760048508644104, "learning_rate": 3.997697246204124e-06, "loss": 1.4411, "step": 25477 }, { "epoch": 0.9124214371407596, "grad_norm": 1.7072031497955322, "learning_rate": 3.994451084283324e-06, "loss": 1.4699, "step": 25478 }, { "epoch": 0.9124572492703279, "grad_norm": 1.4796770811080933, "learning_rate": 3.991206213993326e-06, "loss": 1.326, "step": 25479 }, { "epoch": 0.9124930613998962, "grad_norm": 1.6207607984542847, "learning_rate": 3.987962635377806e-06, "loss": 1.2875, "step": 25480 }, { "epoch": 0.9125288735294644, "grad_norm": 1.4966524839401245, "learning_rate": 3.98472034848042e-06, "loss": 1.3703, "step": 25481 }, { "epoch": 0.9125646856590327, "grad_norm": 1.418821096420288, "learning_rate": 3.9814793533447635e-06, "loss": 1.2243, "step": 25482 }, { "epoch": 0.912600497788601, "grad_norm": 1.7261139154434204, "learning_rate": 3.978239650014437e-06, "loss": 1.4594, "step": 25483 }, { "epoch": 0.9126363099181692, "grad_norm": 1.719135046005249, "learning_rate": 3.975001238533038e-06, "loss": 1.5768, "step": 25484 }, { "epoch": 0.9126721220477376, "grad_norm": 1.6824172735214233, "learning_rate": 3.971764118944155e-06, "loss": 1.45, "step": 25485 }, { "epoch": 0.9127079341773059, "grad_norm": 1.4959074258804321, "learning_rate": 3.968528291291296e-06, "loss": 1.1142, "step": 25486 }, { "epoch": 0.9127437463068742, "grad_norm": 1.5593359470367432, "learning_rate": 3.965293755618027e-06, "loss": 1.3833, "step": 25487 }, { "epoch": 0.9127795584364424, "grad_norm": 1.45145845413208, "learning_rate": 3.962060511967846e-06, "loss": 1.1292, "step": 25488 }, { "epoch": 0.9128153705660107, "grad_norm": 1.793111801147461, "learning_rate": 3.9588285603842755e-06, "loss": 1.384, "step": 25489 }, { "epoch": 0.912851182695579, "grad_norm": 1.5112842321395874, "learning_rate": 3.955597900910768e-06, "loss": 1.4585, "step": 25490 }, { "epoch": 0.9128869948251472, "grad_norm": 1.887883186340332, "learning_rate": 3.9523685335908e-06, "loss": 1.5188, "step": 25491 }, { "epoch": 0.9129228069547156, "grad_norm": 1.8775691986083984, "learning_rate": 3.9491404584678485e-06, "loss": 1.4276, "step": 25492 }, { "epoch": 0.9129586190842839, "grad_norm": 1.926689863204956, "learning_rate": 3.945913675585289e-06, "loss": 1.4073, "step": 25493 }, { "epoch": 0.9129944312138522, "grad_norm": 1.671813726425171, "learning_rate": 3.9426881849865646e-06, "loss": 1.4181, "step": 25494 }, { "epoch": 0.9130302433434204, "grad_norm": 1.5096828937530518, "learning_rate": 3.939463986715064e-06, "loss": 1.4566, "step": 25495 }, { "epoch": 0.9130660554729887, "grad_norm": 1.3902485370635986, "learning_rate": 3.936241080814174e-06, "loss": 1.2241, "step": 25496 }, { "epoch": 0.913101867602557, "grad_norm": 1.5125646591186523, "learning_rate": 3.933019467327248e-06, "loss": 1.548, "step": 25497 }, { "epoch": 0.9131376797321252, "grad_norm": 1.8359897136688232, "learning_rate": 3.9297991462976196e-06, "loss": 1.5648, "step": 25498 }, { "epoch": 0.9131734918616936, "grad_norm": 1.5971086025238037, "learning_rate": 3.92658011776863e-06, "loss": 1.3288, "step": 25499 }, { "epoch": 0.9132093039912619, "grad_norm": 2.038939952850342, "learning_rate": 3.923362381783568e-06, "loss": 1.2699, "step": 25500 }, { "epoch": 0.9132451161208301, "grad_norm": 2.618192195892334, "learning_rate": 3.920145938385744e-06, "loss": 1.3613, "step": 25501 }, { "epoch": 0.9132809282503984, "grad_norm": 1.5217925310134888, "learning_rate": 3.916930787618412e-06, "loss": 1.46, "step": 25502 }, { "epoch": 0.9133167403799667, "grad_norm": 2.0505995750427246, "learning_rate": 3.913716929524857e-06, "loss": 1.23, "step": 25503 }, { "epoch": 0.913352552509535, "grad_norm": 1.67502760887146, "learning_rate": 3.910504364148282e-06, "loss": 1.3907, "step": 25504 }, { "epoch": 0.9133883646391032, "grad_norm": 1.720183253288269, "learning_rate": 3.907293091531927e-06, "loss": 1.1388, "step": 25505 }, { "epoch": 0.9134241767686716, "grad_norm": 1.7315115928649902, "learning_rate": 3.904083111718993e-06, "loss": 1.2282, "step": 25506 }, { "epoch": 0.9134599888982399, "grad_norm": 1.504867672920227, "learning_rate": 3.900874424752677e-06, "loss": 1.4048, "step": 25507 }, { "epoch": 0.9134958010278081, "grad_norm": 1.2139843702316284, "learning_rate": 3.897667030676133e-06, "loss": 1.311, "step": 25508 }, { "epoch": 0.9135316131573764, "grad_norm": 1.6019784212112427, "learning_rate": 3.8944609295324955e-06, "loss": 1.394, "step": 25509 }, { "epoch": 0.9135674252869447, "grad_norm": 1.34242582321167, "learning_rate": 3.89125612136495e-06, "loss": 1.1476, "step": 25510 }, { "epoch": 0.913603237416513, "grad_norm": 1.3961800336837769, "learning_rate": 3.888052606216564e-06, "loss": 1.481, "step": 25511 }, { "epoch": 0.9136390495460812, "grad_norm": 1.6185824871063232, "learning_rate": 3.884850384130456e-06, "loss": 1.1685, "step": 25512 }, { "epoch": 0.9136748616756496, "grad_norm": 2.0552761554718018, "learning_rate": 3.881649455149694e-06, "loss": 1.3966, "step": 25513 }, { "epoch": 0.9137106738052179, "grad_norm": 1.247956395149231, "learning_rate": 3.878449819317376e-06, "loss": 1.1779, "step": 25514 }, { "epoch": 0.9137464859347861, "grad_norm": 1.512967586517334, "learning_rate": 3.875251476676522e-06, "loss": 1.199, "step": 25515 }, { "epoch": 0.9137822980643544, "grad_norm": 1.5441175699234009, "learning_rate": 3.872054427270167e-06, "loss": 1.108, "step": 25516 }, { "epoch": 0.9138181101939227, "grad_norm": 2.1521899700164795, "learning_rate": 3.868858671141329e-06, "loss": 1.6401, "step": 25517 }, { "epoch": 0.9138539223234909, "grad_norm": 2.341466188430786, "learning_rate": 3.865664208332986e-06, "loss": 1.3783, "step": 25518 }, { "epoch": 0.9138897344530592, "grad_norm": 1.7183805704116821, "learning_rate": 3.862471038888138e-06, "loss": 1.2954, "step": 25519 }, { "epoch": 0.9139255465826276, "grad_norm": 1.7073190212249756, "learning_rate": 3.859279162849716e-06, "loss": 1.6382, "step": 25520 }, { "epoch": 0.9139613587121959, "grad_norm": 1.8298503160476685, "learning_rate": 3.856088580260697e-06, "loss": 1.6447, "step": 25521 }, { "epoch": 0.9139971708417641, "grad_norm": 1.38832688331604, "learning_rate": 3.8528992911639806e-06, "loss": 1.3147, "step": 25522 }, { "epoch": 0.9140329829713324, "grad_norm": 1.9283418655395508, "learning_rate": 3.8497112956024875e-06, "loss": 1.2106, "step": 25523 }, { "epoch": 0.9140687951009007, "grad_norm": 1.6704370975494385, "learning_rate": 3.846524593619094e-06, "loss": 1.6686, "step": 25524 }, { "epoch": 0.9141046072304689, "grad_norm": 2.005181074142456, "learning_rate": 3.8433391852567e-06, "loss": 1.9029, "step": 25525 }, { "epoch": 0.9141404193600372, "grad_norm": 1.8579343557357788, "learning_rate": 3.840155070558149e-06, "loss": 1.2547, "step": 25526 }, { "epoch": 0.9141762314896056, "grad_norm": 1.4794865846633911, "learning_rate": 3.836972249566239e-06, "loss": 1.1888, "step": 25527 }, { "epoch": 0.9142120436191739, "grad_norm": 1.6024411916732788, "learning_rate": 3.83379072232386e-06, "loss": 1.2923, "step": 25528 }, { "epoch": 0.9142478557487421, "grad_norm": 1.8010493516921997, "learning_rate": 3.830610488873765e-06, "loss": 1.2005, "step": 25529 }, { "epoch": 0.9142836678783104, "grad_norm": 1.4514111280441284, "learning_rate": 3.827431549258764e-06, "loss": 1.6173, "step": 25530 }, { "epoch": 0.9143194800078787, "grad_norm": 1.4556541442871094, "learning_rate": 3.824253903521602e-06, "loss": 1.3869, "step": 25531 }, { "epoch": 0.9143552921374469, "grad_norm": 1.7731586694717407, "learning_rate": 3.821077551705065e-06, "loss": 1.557, "step": 25532 }, { "epoch": 0.9143911042670152, "grad_norm": 1.5040415525436401, "learning_rate": 3.817902493851877e-06, "loss": 1.1799, "step": 25533 }, { "epoch": 0.9144269163965836, "grad_norm": 1.508422613143921, "learning_rate": 3.814728730004724e-06, "loss": 1.3727, "step": 25534 }, { "epoch": 0.9144627285261518, "grad_norm": 1.460686206817627, "learning_rate": 3.811556260206328e-06, "loss": 1.3637, "step": 25535 }, { "epoch": 0.9144985406557201, "grad_norm": 2.1639134883880615, "learning_rate": 3.808385084499366e-06, "loss": 1.4715, "step": 25536 }, { "epoch": 0.9145343527852884, "grad_norm": 1.949466586112976, "learning_rate": 3.8052152029265154e-06, "loss": 1.3316, "step": 25537 }, { "epoch": 0.9145701649148567, "grad_norm": 1.9992917776107788, "learning_rate": 3.8020466155304078e-06, "loss": 1.3247, "step": 25538 }, { "epoch": 0.9146059770444249, "grad_norm": 1.9052329063415527, "learning_rate": 3.798879322353666e-06, "loss": 1.5343, "step": 25539 }, { "epoch": 0.9146417891739932, "grad_norm": 1.6159497499465942, "learning_rate": 3.7957133234389207e-06, "loss": 1.5618, "step": 25540 }, { "epoch": 0.9146776013035616, "grad_norm": 1.5084832906723022, "learning_rate": 3.7925486188287727e-06, "loss": 1.4374, "step": 25541 }, { "epoch": 0.9147134134331298, "grad_norm": 1.6207736730575562, "learning_rate": 3.7893852085657657e-06, "loss": 1.5015, "step": 25542 }, { "epoch": 0.9147492255626981, "grad_norm": 1.9216586351394653, "learning_rate": 3.786223092692476e-06, "loss": 1.6365, "step": 25543 }, { "epoch": 0.9147850376922664, "grad_norm": 1.3661115169525146, "learning_rate": 3.7830622712514696e-06, "loss": 1.3516, "step": 25544 }, { "epoch": 0.9148208498218346, "grad_norm": 1.7824134826660156, "learning_rate": 3.779902744285224e-06, "loss": 1.0715, "step": 25545 }, { "epoch": 0.9148566619514029, "grad_norm": 2.196190357208252, "learning_rate": 3.7767445118362832e-06, "loss": 1.1834, "step": 25546 }, { "epoch": 0.9148924740809712, "grad_norm": 1.838174819946289, "learning_rate": 3.7735875739471237e-06, "loss": 1.417, "step": 25547 }, { "epoch": 0.9149282862105395, "grad_norm": 1.4311494827270508, "learning_rate": 3.770431930660223e-06, "loss": 1.3594, "step": 25548 }, { "epoch": 0.9149640983401078, "grad_norm": 1.6186925172805786, "learning_rate": 3.767277582018036e-06, "loss": 1.2366, "step": 25549 }, { "epoch": 0.9149999104696761, "grad_norm": 1.7173895835876465, "learning_rate": 3.7641245280629842e-06, "loss": 1.3939, "step": 25550 }, { "epoch": 0.9150357225992444, "grad_norm": 2.2020442485809326, "learning_rate": 3.760972768837523e-06, "loss": 1.4908, "step": 25551 }, { "epoch": 0.9150715347288126, "grad_norm": 1.694572925567627, "learning_rate": 3.757822304384018e-06, "loss": 1.3844, "step": 25552 }, { "epoch": 0.9151073468583809, "grad_norm": 1.406884789466858, "learning_rate": 3.7546731347448685e-06, "loss": 1.3992, "step": 25553 }, { "epoch": 0.9151431589879492, "grad_norm": 1.7870287895202637, "learning_rate": 3.7515252599624516e-06, "loss": 1.3392, "step": 25554 }, { "epoch": 0.9151789711175174, "grad_norm": 1.6366087198257446, "learning_rate": 3.748378680079112e-06, "loss": 1.3854, "step": 25555 }, { "epoch": 0.9152147832470858, "grad_norm": 1.4431771039962769, "learning_rate": 3.745233395137182e-06, "loss": 1.4213, "step": 25556 }, { "epoch": 0.9152505953766541, "grad_norm": 1.887716293334961, "learning_rate": 3.7420894051789723e-06, "loss": 1.394, "step": 25557 }, { "epoch": 0.9152864075062224, "grad_norm": 1.6417644023895264, "learning_rate": 3.7389467102467823e-06, "loss": 1.3629, "step": 25558 }, { "epoch": 0.9153222196357906, "grad_norm": 1.8466694355010986, "learning_rate": 3.7358053103829117e-06, "loss": 1.4253, "step": 25559 }, { "epoch": 0.9153580317653589, "grad_norm": 1.1880935430526733, "learning_rate": 3.732665205629593e-06, "loss": 1.306, "step": 25560 }, { "epoch": 0.9153938438949272, "grad_norm": 1.6200544834136963, "learning_rate": 3.7295263960290927e-06, "loss": 1.3328, "step": 25561 }, { "epoch": 0.9154296560244954, "grad_norm": 1.4610018730163574, "learning_rate": 3.7263888816236435e-06, "loss": 0.9073, "step": 25562 }, { "epoch": 0.9154654681540638, "grad_norm": 1.679741621017456, "learning_rate": 3.7232526624554344e-06, "loss": 1.6995, "step": 25563 }, { "epoch": 0.9155012802836321, "grad_norm": 1.569359540939331, "learning_rate": 3.720117738566675e-06, "loss": 1.2822, "step": 25564 }, { "epoch": 0.9155370924132004, "grad_norm": 2.0695765018463135, "learning_rate": 3.7169841099995438e-06, "loss": 1.6033, "step": 25565 }, { "epoch": 0.9155729045427686, "grad_norm": 1.6272145509719849, "learning_rate": 3.7138517767961954e-06, "loss": 1.2955, "step": 25566 }, { "epoch": 0.9156087166723369, "grad_norm": 1.9181761741638184, "learning_rate": 3.710720738998774e-06, "loss": 1.4108, "step": 25567 }, { "epoch": 0.9156445288019052, "grad_norm": 2.273002862930298, "learning_rate": 3.7075909966493903e-06, "loss": 1.6432, "step": 25568 }, { "epoch": 0.9156803409314734, "grad_norm": 1.7246898412704468, "learning_rate": 3.7044625497901774e-06, "loss": 1.2398, "step": 25569 }, { "epoch": 0.9157161530610418, "grad_norm": 1.4257093667984009, "learning_rate": 3.7013353984631906e-06, "loss": 1.7232, "step": 25570 }, { "epoch": 0.9157519651906101, "grad_norm": 1.7329950332641602, "learning_rate": 3.698209542710529e-06, "loss": 1.4783, "step": 25571 }, { "epoch": 0.9157877773201784, "grad_norm": 1.9357683658599854, "learning_rate": 3.6950849825742375e-06, "loss": 1.4423, "step": 25572 }, { "epoch": 0.9158235894497466, "grad_norm": 2.051203489303589, "learning_rate": 3.6919617180963595e-06, "loss": 1.6041, "step": 25573 }, { "epoch": 0.9158594015793149, "grad_norm": 1.6284784078598022, "learning_rate": 3.6888397493188954e-06, "loss": 1.5151, "step": 25574 }, { "epoch": 0.9158952137088832, "grad_norm": 1.5720276832580566, "learning_rate": 3.685719076283867e-06, "loss": 1.398, "step": 25575 }, { "epoch": 0.9159310258384514, "grad_norm": 1.6514610052108765, "learning_rate": 3.68259969903324e-06, "loss": 1.3815, "step": 25576 }, { "epoch": 0.9159668379680198, "grad_norm": 1.2488956451416016, "learning_rate": 3.6794816176090152e-06, "loss": 1.3639, "step": 25577 }, { "epoch": 0.9160026500975881, "grad_norm": 2.5675435066223145, "learning_rate": 3.676364832053103e-06, "loss": 1.2767, "step": 25578 }, { "epoch": 0.9160384622271563, "grad_norm": 1.3877888917922974, "learning_rate": 3.6732493424074587e-06, "loss": 1.2629, "step": 25579 }, { "epoch": 0.9160742743567246, "grad_norm": 1.5578382015228271, "learning_rate": 3.6701351487140046e-06, "loss": 1.5932, "step": 25580 }, { "epoch": 0.9161100864862929, "grad_norm": 2.35837459564209, "learning_rate": 3.667022251014607e-06, "loss": 1.3385, "step": 25581 }, { "epoch": 0.9161458986158612, "grad_norm": 1.7006118297576904, "learning_rate": 3.6639106493511766e-06, "loss": 1.262, "step": 25582 }, { "epoch": 0.9161817107454294, "grad_norm": 1.6541268825531006, "learning_rate": 3.660800343765547e-06, "loss": 1.4949, "step": 25583 }, { "epoch": 0.9162175228749978, "grad_norm": 1.5310859680175781, "learning_rate": 3.657691334299607e-06, "loss": 1.6051, "step": 25584 }, { "epoch": 0.9162533350045661, "grad_norm": 1.7938069105148315, "learning_rate": 3.6545836209951333e-06, "loss": 1.4242, "step": 25585 }, { "epoch": 0.9162891471341343, "grad_norm": 1.6601662635803223, "learning_rate": 3.6514772038939714e-06, "loss": 1.4616, "step": 25586 }, { "epoch": 0.9163249592637026, "grad_norm": 2.1517858505249023, "learning_rate": 3.64837208303791e-06, "loss": 1.5088, "step": 25587 }, { "epoch": 0.9163607713932709, "grad_norm": 1.3455365896224976, "learning_rate": 3.6452682584687035e-06, "loss": 1.12, "step": 25588 }, { "epoch": 0.9163965835228391, "grad_norm": 1.8301458358764648, "learning_rate": 3.642165730228131e-06, "loss": 1.3795, "step": 25589 }, { "epoch": 0.9164323956524074, "grad_norm": 1.7565302848815918, "learning_rate": 3.6390644983579135e-06, "loss": 1.4932, "step": 25590 }, { "epoch": 0.9164682077819758, "grad_norm": 1.7920565605163574, "learning_rate": 3.6359645628998073e-06, "loss": 1.5721, "step": 25591 }, { "epoch": 0.9165040199115441, "grad_norm": 1.578942894935608, "learning_rate": 3.6328659238954897e-06, "loss": 1.3415, "step": 25592 }, { "epoch": 0.9165398320411123, "grad_norm": 1.4769006967544556, "learning_rate": 3.62976858138665e-06, "loss": 1.2929, "step": 25593 }, { "epoch": 0.9165756441706806, "grad_norm": 1.8735318183898926, "learning_rate": 3.6266725354149656e-06, "loss": 1.7607, "step": 25594 }, { "epoch": 0.9166114563002489, "grad_norm": 1.9881622791290283, "learning_rate": 3.6235777860221033e-06, "loss": 1.5311, "step": 25595 }, { "epoch": 0.9166472684298171, "grad_norm": 1.5557109117507935, "learning_rate": 3.620484333249674e-06, "loss": 1.3811, "step": 25596 }, { "epoch": 0.9166830805593854, "grad_norm": 1.9754571914672852, "learning_rate": 3.6173921771393003e-06, "loss": 1.4716, "step": 25597 }, { "epoch": 0.9167188926889538, "grad_norm": 1.7173326015472412, "learning_rate": 3.6143013177326046e-06, "loss": 1.6932, "step": 25598 }, { "epoch": 0.9167547048185221, "grad_norm": 1.4891917705535889, "learning_rate": 3.611211755071142e-06, "loss": 1.4409, "step": 25599 }, { "epoch": 0.9167905169480903, "grad_norm": 2.2099905014038086, "learning_rate": 3.608123489196502e-06, "loss": 1.2602, "step": 25600 }, { "epoch": 0.9168263290776586, "grad_norm": 1.5880131721496582, "learning_rate": 3.605036520150218e-06, "loss": 1.3322, "step": 25601 }, { "epoch": 0.9168621412072269, "grad_norm": 1.5003440380096436, "learning_rate": 3.601950847973845e-06, "loss": 1.5016, "step": 25602 }, { "epoch": 0.9168979533367951, "grad_norm": 1.5447492599487305, "learning_rate": 3.598866472708862e-06, "loss": 1.4572, "step": 25603 }, { "epoch": 0.9169337654663634, "grad_norm": 1.4146647453308105, "learning_rate": 3.595783394396779e-06, "loss": 1.418, "step": 25604 }, { "epoch": 0.9169695775959318, "grad_norm": 1.7357875108718872, "learning_rate": 3.592701613079097e-06, "loss": 1.1391, "step": 25605 }, { "epoch": 0.9170053897255, "grad_norm": 1.6281183958053589, "learning_rate": 3.5896211287972383e-06, "loss": 1.4511, "step": 25606 }, { "epoch": 0.9170412018550683, "grad_norm": 1.7715595960617065, "learning_rate": 3.5865419415926803e-06, "loss": 1.5196, "step": 25607 }, { "epoch": 0.9170770139846366, "grad_norm": 1.6238850355148315, "learning_rate": 3.583464051506813e-06, "loss": 1.3839, "step": 25608 }, { "epoch": 0.9171128261142049, "grad_norm": 1.6311407089233398, "learning_rate": 3.5803874585811024e-06, "loss": 1.4173, "step": 25609 }, { "epoch": 0.9171486382437731, "grad_norm": 1.4624392986297607, "learning_rate": 3.577312162856883e-06, "loss": 1.3834, "step": 25610 }, { "epoch": 0.9171844503733414, "grad_norm": 1.55482816696167, "learning_rate": 3.574238164375554e-06, "loss": 1.433, "step": 25611 }, { "epoch": 0.9172202625029098, "grad_norm": 1.4993577003479004, "learning_rate": 3.571165463178472e-06, "loss": 1.4662, "step": 25612 }, { "epoch": 0.917256074632478, "grad_norm": 1.3473643064498901, "learning_rate": 3.568094059306981e-06, "loss": 1.401, "step": 25613 }, { "epoch": 0.9172918867620463, "grad_norm": 2.217761516571045, "learning_rate": 3.5650239528024043e-06, "loss": 1.266, "step": 25614 }, { "epoch": 0.9173276988916146, "grad_norm": 1.6229770183563232, "learning_rate": 3.5619551437060083e-06, "loss": 1.5986, "step": 25615 }, { "epoch": 0.9173635110211829, "grad_norm": 1.9005887508392334, "learning_rate": 3.558887632059138e-06, "loss": 0.9255, "step": 25616 }, { "epoch": 0.9173993231507511, "grad_norm": 1.1790728569030762, "learning_rate": 3.555821417903027e-06, "loss": 1.1397, "step": 25617 }, { "epoch": 0.9174351352803194, "grad_norm": 1.6984502077102661, "learning_rate": 3.552756501278931e-06, "loss": 1.413, "step": 25618 }, { "epoch": 0.9174709474098878, "grad_norm": 1.8751317262649536, "learning_rate": 3.549692882228084e-06, "loss": 1.6012, "step": 25619 }, { "epoch": 0.917506759539456, "grad_norm": 1.5878108739852905, "learning_rate": 3.5466305607917195e-06, "loss": 1.2097, "step": 25620 }, { "epoch": 0.9175425716690243, "grad_norm": 1.529499888420105, "learning_rate": 3.5435695370110154e-06, "loss": 1.5132, "step": 25621 }, { "epoch": 0.9175783837985926, "grad_norm": 1.7148410081863403, "learning_rate": 3.540509810927173e-06, "loss": 1.4484, "step": 25622 }, { "epoch": 0.9176141959281608, "grad_norm": 1.3747161626815796, "learning_rate": 3.537451382581336e-06, "loss": 1.1541, "step": 25623 }, { "epoch": 0.9176500080577291, "grad_norm": 1.7710306644439697, "learning_rate": 3.534394252014661e-06, "loss": 1.4204, "step": 25624 }, { "epoch": 0.9176858201872974, "grad_norm": 1.4869321584701538, "learning_rate": 3.531338419268293e-06, "loss": 1.6908, "step": 25625 }, { "epoch": 0.9177216323168658, "grad_norm": 1.8567776679992676, "learning_rate": 3.52828388438331e-06, "loss": 1.131, "step": 25626 }, { "epoch": 0.917757444446434, "grad_norm": 1.6301764249801636, "learning_rate": 3.5252306474008457e-06, "loss": 1.3344, "step": 25627 }, { "epoch": 0.9177932565760023, "grad_norm": 1.6216933727264404, "learning_rate": 3.522178708361956e-06, "loss": 1.4663, "step": 25628 }, { "epoch": 0.9178290687055706, "grad_norm": 1.4409526586532593, "learning_rate": 3.5191280673077086e-06, "loss": 1.3619, "step": 25629 }, { "epoch": 0.9178648808351388, "grad_norm": 2.590237855911255, "learning_rate": 3.516078724279137e-06, "loss": 1.1399, "step": 25630 }, { "epoch": 0.9179006929647071, "grad_norm": 2.0231406688690186, "learning_rate": 3.513030679317264e-06, "loss": 1.4057, "step": 25631 }, { "epoch": 0.9179365050942754, "grad_norm": 1.7201601266860962, "learning_rate": 3.5099839324631233e-06, "loss": 1.4519, "step": 25632 }, { "epoch": 0.9179723172238438, "grad_norm": 1.846491813659668, "learning_rate": 3.506938483757671e-06, "loss": 1.2908, "step": 25633 }, { "epoch": 0.918008129353412, "grad_norm": 2.1384713649749756, "learning_rate": 3.503894333241886e-06, "loss": 1.3344, "step": 25634 }, { "epoch": 0.9180439414829803, "grad_norm": 1.8844846487045288, "learning_rate": 3.500851480956746e-06, "loss": 1.228, "step": 25635 }, { "epoch": 0.9180797536125486, "grad_norm": 2.2950620651245117, "learning_rate": 3.497809926943174e-06, "loss": 1.5764, "step": 25636 }, { "epoch": 0.9181155657421168, "grad_norm": 1.582805871963501, "learning_rate": 3.4947696712420708e-06, "loss": 1.4444, "step": 25637 }, { "epoch": 0.9181513778716851, "grad_norm": 1.5671573877334595, "learning_rate": 3.491730713894381e-06, "loss": 1.3715, "step": 25638 }, { "epoch": 0.9181871900012534, "grad_norm": 1.5163581371307373, "learning_rate": 3.4886930549409724e-06, "loss": 1.3593, "step": 25639 }, { "epoch": 0.9182230021308218, "grad_norm": 1.5615180730819702, "learning_rate": 3.485656694422701e-06, "loss": 1.1561, "step": 25640 }, { "epoch": 0.91825881426039, "grad_norm": 1.5961110591888428, "learning_rate": 3.482621632380412e-06, "loss": 1.4103, "step": 25641 }, { "epoch": 0.9182946263899583, "grad_norm": 1.2680072784423828, "learning_rate": 3.479587868854961e-06, "loss": 1.6213, "step": 25642 }, { "epoch": 0.9183304385195266, "grad_norm": 1.8912404775619507, "learning_rate": 3.4765554038871607e-06, "loss": 1.2256, "step": 25643 }, { "epoch": 0.9183662506490948, "grad_norm": 1.5284357070922852, "learning_rate": 3.4735242375177777e-06, "loss": 1.577, "step": 25644 }, { "epoch": 0.9184020627786631, "grad_norm": 1.3570201396942139, "learning_rate": 3.470494369787636e-06, "loss": 1.508, "step": 25645 }, { "epoch": 0.9184378749082314, "grad_norm": 2.326610803604126, "learning_rate": 3.4674658007374683e-06, "loss": 1.3932, "step": 25646 }, { "epoch": 0.9184736870377997, "grad_norm": 1.5897072553634644, "learning_rate": 3.464438530408043e-06, "loss": 1.6262, "step": 25647 }, { "epoch": 0.918509499167368, "grad_norm": 1.651442289352417, "learning_rate": 3.46141255884006e-06, "loss": 1.0848, "step": 25648 }, { "epoch": 0.9185453112969363, "grad_norm": 1.5025795698165894, "learning_rate": 3.4583878860742434e-06, "loss": 1.5907, "step": 25649 }, { "epoch": 0.9185811234265046, "grad_norm": 2.048462152481079, "learning_rate": 3.4553645121513046e-06, "loss": 1.8224, "step": 25650 }, { "epoch": 0.9186169355560728, "grad_norm": 1.4465118646621704, "learning_rate": 3.4523424371118885e-06, "loss": 1.4116, "step": 25651 }, { "epoch": 0.9186527476856411, "grad_norm": 2.2787973880767822, "learning_rate": 3.449321660996674e-06, "loss": 1.4564, "step": 25652 }, { "epoch": 0.9186885598152094, "grad_norm": 1.6836674213409424, "learning_rate": 3.446302183846295e-06, "loss": 1.1173, "step": 25653 }, { "epoch": 0.9187243719447777, "grad_norm": 1.779667615890503, "learning_rate": 3.443284005701375e-06, "loss": 1.5232, "step": 25654 }, { "epoch": 0.918760184074346, "grad_norm": 1.2427458763122559, "learning_rate": 3.4402671266025253e-06, "loss": 1.2979, "step": 25655 }, { "epoch": 0.9187959962039143, "grad_norm": 1.405266523361206, "learning_rate": 3.4372515465903145e-06, "loss": 1.5639, "step": 25656 }, { "epoch": 0.9188318083334825, "grad_norm": 1.3440146446228027, "learning_rate": 3.434237265705342e-06, "loss": 1.3727, "step": 25657 }, { "epoch": 0.9188676204630508, "grad_norm": 1.969915747642517, "learning_rate": 3.4312242839881325e-06, "loss": 1.0968, "step": 25658 }, { "epoch": 0.9189034325926191, "grad_norm": 1.9113439321517944, "learning_rate": 3.4282126014792414e-06, "loss": 1.6633, "step": 25659 }, { "epoch": 0.9189392447221874, "grad_norm": 1.5810271501541138, "learning_rate": 3.4252022182191813e-06, "loss": 1.4188, "step": 25660 }, { "epoch": 0.9189750568517557, "grad_norm": 2.1815831661224365, "learning_rate": 3.4221931342484525e-06, "loss": 1.7841, "step": 25661 }, { "epoch": 0.919010868981324, "grad_norm": 1.472642183303833, "learning_rate": 3.4191853496075343e-06, "loss": 1.4584, "step": 25662 }, { "epoch": 0.9190466811108923, "grad_norm": 1.7633564472198486, "learning_rate": 3.4161788643369052e-06, "loss": 1.2982, "step": 25663 }, { "epoch": 0.9190824932404605, "grad_norm": 2.459702730178833, "learning_rate": 3.4131736784769996e-06, "loss": 1.5756, "step": 25664 }, { "epoch": 0.9191183053700288, "grad_norm": 1.4552972316741943, "learning_rate": 3.410169792068263e-06, "loss": 1.2331, "step": 25665 }, { "epoch": 0.9191541174995971, "grad_norm": 1.558521032333374, "learning_rate": 3.407167205151085e-06, "loss": 1.194, "step": 25666 }, { "epoch": 0.9191899296291653, "grad_norm": 1.9817105531692505, "learning_rate": 3.404165917765889e-06, "loss": 1.4798, "step": 25667 }, { "epoch": 0.9192257417587337, "grad_norm": 1.6500427722930908, "learning_rate": 3.401165929953043e-06, "loss": 1.4384, "step": 25668 }, { "epoch": 0.919261553888302, "grad_norm": 1.5914006233215332, "learning_rate": 3.398167241752892e-06, "loss": 1.521, "step": 25669 }, { "epoch": 0.9192973660178703, "grad_norm": 1.3515881299972534, "learning_rate": 3.395169853205793e-06, "loss": 1.5902, "step": 25670 }, { "epoch": 0.9193331781474385, "grad_norm": 1.7092448472976685, "learning_rate": 3.3921737643520803e-06, "loss": 1.537, "step": 25671 }, { "epoch": 0.9193689902770068, "grad_norm": 1.3170454502105713, "learning_rate": 3.3891789752320656e-06, "loss": 1.3537, "step": 25672 }, { "epoch": 0.9194048024065751, "grad_norm": 2.0973784923553467, "learning_rate": 3.3861854858860177e-06, "loss": 1.5353, "step": 25673 }, { "epoch": 0.9194406145361433, "grad_norm": 1.5266790390014648, "learning_rate": 3.3831932963542147e-06, "loss": 1.3477, "step": 25674 }, { "epoch": 0.9194764266657117, "grad_norm": 1.6230286359786987, "learning_rate": 3.3802024066769355e-06, "loss": 1.3931, "step": 25675 }, { "epoch": 0.91951223879528, "grad_norm": 2.351484775543213, "learning_rate": 3.3772128168943816e-06, "loss": 1.5749, "step": 25676 }, { "epoch": 0.9195480509248483, "grad_norm": 2.194748878479004, "learning_rate": 3.37422452704681e-06, "loss": 1.4503, "step": 25677 }, { "epoch": 0.9195838630544165, "grad_norm": 1.614140510559082, "learning_rate": 3.3712375371743987e-06, "loss": 1.5758, "step": 25678 }, { "epoch": 0.9196196751839848, "grad_norm": 2.11362624168396, "learning_rate": 3.3682518473173607e-06, "loss": 1.3219, "step": 25679 }, { "epoch": 0.9196554873135531, "grad_norm": 2.002917766571045, "learning_rate": 3.3652674575158306e-06, "loss": 1.2815, "step": 25680 }, { "epoch": 0.9196912994431213, "grad_norm": 1.6976182460784912, "learning_rate": 3.362284367809976e-06, "loss": 1.3635, "step": 25681 }, { "epoch": 0.9197271115726897, "grad_norm": 1.6870442628860474, "learning_rate": 3.3593025782399424e-06, "loss": 1.2197, "step": 25682 }, { "epoch": 0.919762923702258, "grad_norm": 2.186999797821045, "learning_rate": 3.3563220888458425e-06, "loss": 1.3623, "step": 25683 }, { "epoch": 0.9197987358318263, "grad_norm": 1.727008581161499, "learning_rate": 3.353342899667755e-06, "loss": 1.2574, "step": 25684 }, { "epoch": 0.9198345479613945, "grad_norm": 1.7052570581436157, "learning_rate": 3.3503650107457706e-06, "loss": 1.7394, "step": 25685 }, { "epoch": 0.9198703600909628, "grad_norm": 1.6224033832550049, "learning_rate": 3.347388422119968e-06, "loss": 1.3684, "step": 25686 }, { "epoch": 0.9199061722205311, "grad_norm": 1.8566479682922363, "learning_rate": 3.3444131338303708e-06, "loss": 1.127, "step": 25687 }, { "epoch": 0.9199419843500993, "grad_norm": 2.9639549255371094, "learning_rate": 3.3414391459170134e-06, "loss": 1.5899, "step": 25688 }, { "epoch": 0.9199777964796677, "grad_norm": 1.5856989622116089, "learning_rate": 3.33846645841992e-06, "loss": 1.2898, "step": 25689 }, { "epoch": 0.920013608609236, "grad_norm": 1.1478488445281982, "learning_rate": 3.33549507137908e-06, "loss": 1.3804, "step": 25690 }, { "epoch": 0.9200494207388042, "grad_norm": 1.6433991193771362, "learning_rate": 3.332524984834462e-06, "loss": 1.6188, "step": 25691 }, { "epoch": 0.9200852328683725, "grad_norm": 1.6301097869873047, "learning_rate": 3.3295561988260227e-06, "loss": 1.6194, "step": 25692 }, { "epoch": 0.9201210449979408, "grad_norm": 3.6255156993865967, "learning_rate": 3.326588713393719e-06, "loss": 1.531, "step": 25693 }, { "epoch": 0.920156857127509, "grad_norm": 1.6354080438613892, "learning_rate": 3.3236225285774637e-06, "loss": 1.4595, "step": 25694 }, { "epoch": 0.9201926692570773, "grad_norm": 1.5773730278015137, "learning_rate": 3.3206576444171577e-06, "loss": 1.2759, "step": 25695 }, { "epoch": 0.9202284813866457, "grad_norm": 1.889205813407898, "learning_rate": 3.317694060952692e-06, "loss": 1.1222, "step": 25696 }, { "epoch": 0.920264293516214, "grad_norm": 1.4370874166488647, "learning_rate": 3.314731778223956e-06, "loss": 1.443, "step": 25697 }, { "epoch": 0.9203001056457822, "grad_norm": 1.4879106283187866, "learning_rate": 3.3117707962707746e-06, "loss": 1.5812, "step": 25698 }, { "epoch": 0.9203359177753505, "grad_norm": 1.411486268043518, "learning_rate": 3.308811115133004e-06, "loss": 1.5091, "step": 25699 }, { "epoch": 0.9203717299049188, "grad_norm": 2.297484874725342, "learning_rate": 3.3058527348504455e-06, "loss": 1.1828, "step": 25700 }, { "epoch": 0.920407542034487, "grad_norm": 1.640698790550232, "learning_rate": 3.302895655462934e-06, "loss": 1.6501, "step": 25701 }, { "epoch": 0.9204433541640553, "grad_norm": 1.9217278957366943, "learning_rate": 3.2999398770102276e-06, "loss": 1.2598, "step": 25702 }, { "epoch": 0.9204791662936237, "grad_norm": 1.5686508417129517, "learning_rate": 3.296985399532071e-06, "loss": 1.4808, "step": 25703 }, { "epoch": 0.920514978423192, "grad_norm": 1.4743748903274536, "learning_rate": 3.2940322230682664e-06, "loss": 1.2292, "step": 25704 }, { "epoch": 0.9205507905527602, "grad_norm": 1.3531376123428345, "learning_rate": 3.291080347658504e-06, "loss": 1.2794, "step": 25705 }, { "epoch": 0.9205866026823285, "grad_norm": 1.6078829765319824, "learning_rate": 3.2881297733425188e-06, "loss": 1.4727, "step": 25706 }, { "epoch": 0.9206224148118968, "grad_norm": 1.4829717874526978, "learning_rate": 3.285180500159979e-06, "loss": 1.5516, "step": 25707 }, { "epoch": 0.920658226941465, "grad_norm": 1.7333753108978271, "learning_rate": 3.2822325281505973e-06, "loss": 1.3617, "step": 25708 }, { "epoch": 0.9206940390710333, "grad_norm": 1.9004656076431274, "learning_rate": 3.27928585735402e-06, "loss": 1.8051, "step": 25709 }, { "epoch": 0.9207298512006017, "grad_norm": 1.6876628398895264, "learning_rate": 3.2763404878098815e-06, "loss": 1.4722, "step": 25710 }, { "epoch": 0.92076566333017, "grad_norm": 1.6359210014343262, "learning_rate": 3.273396419557839e-06, "loss": 1.2267, "step": 25711 }, { "epoch": 0.9208014754597382, "grad_norm": 1.5168014764785767, "learning_rate": 3.2704536526374506e-06, "loss": 1.3787, "step": 25712 }, { "epoch": 0.9208372875893065, "grad_norm": 2.176189661026001, "learning_rate": 3.267512187088362e-06, "loss": 1.6466, "step": 25713 }, { "epoch": 0.9208730997188748, "grad_norm": 1.7817593812942505, "learning_rate": 3.2645720229500965e-06, "loss": 1.7843, "step": 25714 }, { "epoch": 0.920908911848443, "grad_norm": 1.368715524673462, "learning_rate": 3.2616331602622565e-06, "loss": 1.5336, "step": 25715 }, { "epoch": 0.9209447239780113, "grad_norm": 1.853217363357544, "learning_rate": 3.2586955990643432e-06, "loss": 1.4965, "step": 25716 }, { "epoch": 0.9209805361075797, "grad_norm": 2.11053729057312, "learning_rate": 3.255759339395903e-06, "loss": 1.5349, "step": 25717 }, { "epoch": 0.921016348237148, "grad_norm": 1.5958489179611206, "learning_rate": 3.2528243812964156e-06, "loss": 1.5431, "step": 25718 }, { "epoch": 0.9210521603667162, "grad_norm": 1.3022187948226929, "learning_rate": 3.2498907248054045e-06, "loss": 1.6714, "step": 25719 }, { "epoch": 0.9210879724962845, "grad_norm": 1.4858121871948242, "learning_rate": 3.2469583699623053e-06, "loss": 1.5852, "step": 25720 }, { "epoch": 0.9211237846258528, "grad_norm": 2.0671451091766357, "learning_rate": 3.2440273168065636e-06, "loss": 1.0612, "step": 25721 }, { "epoch": 0.921159596755421, "grad_norm": 1.670335054397583, "learning_rate": 3.241097565377649e-06, "loss": 1.5164, "step": 25722 }, { "epoch": 0.9211954088849893, "grad_norm": 1.7597389221191406, "learning_rate": 3.2381691157149395e-06, "loss": 1.4661, "step": 25723 }, { "epoch": 0.9212312210145577, "grad_norm": 1.881030797958374, "learning_rate": 3.2352419678578714e-06, "loss": 1.3387, "step": 25724 }, { "epoch": 0.921267033144126, "grad_norm": 1.9583910703659058, "learning_rate": 3.2323161218457796e-06, "loss": 1.5953, "step": 25725 }, { "epoch": 0.9213028452736942, "grad_norm": 1.6051225662231445, "learning_rate": 3.229391577718066e-06, "loss": 1.2554, "step": 25726 }, { "epoch": 0.9213386574032625, "grad_norm": 1.7156764268875122, "learning_rate": 3.226468335514077e-06, "loss": 1.4546, "step": 25727 }, { "epoch": 0.9213744695328308, "grad_norm": 1.7657434940338135, "learning_rate": 3.223546395273114e-06, "loss": 1.3154, "step": 25728 }, { "epoch": 0.921410281662399, "grad_norm": 1.699459433555603, "learning_rate": 3.220625757034501e-06, "loss": 1.4964, "step": 25729 }, { "epoch": 0.9214460937919673, "grad_norm": 1.7240272760391235, "learning_rate": 3.2177064208375298e-06, "loss": 1.6393, "step": 25730 }, { "epoch": 0.9214819059215357, "grad_norm": 1.9153105020523071, "learning_rate": 3.21478838672149e-06, "loss": 1.7071, "step": 25731 }, { "epoch": 0.9215177180511039, "grad_norm": 2.9040825366973877, "learning_rate": 3.211871654725618e-06, "loss": 1.7588, "step": 25732 }, { "epoch": 0.9215535301806722, "grad_norm": 1.6486643552780151, "learning_rate": 3.208956224889159e-06, "loss": 1.3715, "step": 25733 }, { "epoch": 0.9215893423102405, "grad_norm": 1.6218197345733643, "learning_rate": 3.2060420972513494e-06, "loss": 1.3616, "step": 25734 }, { "epoch": 0.9216251544398087, "grad_norm": 1.4166431427001953, "learning_rate": 3.203129271851402e-06, "loss": 1.2931, "step": 25735 }, { "epoch": 0.921660966569377, "grad_norm": 1.4935784339904785, "learning_rate": 3.2002177487284736e-06, "loss": 1.3154, "step": 25736 }, { "epoch": 0.9216967786989453, "grad_norm": 1.6623226404190063, "learning_rate": 3.197307527921756e-06, "loss": 1.4607, "step": 25737 }, { "epoch": 0.9217325908285137, "grad_norm": 1.6059545278549194, "learning_rate": 3.194398609470406e-06, "loss": 1.4489, "step": 25738 }, { "epoch": 0.9217684029580819, "grad_norm": 1.5388230085372925, "learning_rate": 3.1914909934135483e-06, "loss": 1.2268, "step": 25739 }, { "epoch": 0.9218042150876502, "grad_norm": 1.3560782670974731, "learning_rate": 3.1885846797902964e-06, "loss": 1.3344, "step": 25740 }, { "epoch": 0.9218400272172185, "grad_norm": 1.4934988021850586, "learning_rate": 3.185679668639763e-06, "loss": 1.4104, "step": 25741 }, { "epoch": 0.9218758393467867, "grad_norm": 1.6801812648773193, "learning_rate": 3.1827759600010498e-06, "loss": 1.6805, "step": 25742 }, { "epoch": 0.921911651476355, "grad_norm": 1.7978261709213257, "learning_rate": 3.179873553913171e-06, "loss": 1.5292, "step": 25743 }, { "epoch": 0.9219474636059233, "grad_norm": 1.6324962377548218, "learning_rate": 3.1769724504152164e-06, "loss": 1.1737, "step": 25744 }, { "epoch": 0.9219832757354917, "grad_norm": 1.4262455701828003, "learning_rate": 3.1740726495462223e-06, "loss": 1.3579, "step": 25745 }, { "epoch": 0.9220190878650599, "grad_norm": 1.422607183456421, "learning_rate": 3.1711741513451576e-06, "loss": 1.2943, "step": 25746 }, { "epoch": 0.9220548999946282, "grad_norm": 1.3137279748916626, "learning_rate": 3.1682769558510574e-06, "loss": 1.6224, "step": 25747 }, { "epoch": 0.9220907121241965, "grad_norm": 1.8328073024749756, "learning_rate": 3.165381063102879e-06, "loss": 1.3839, "step": 25748 }, { "epoch": 0.9221265242537647, "grad_norm": 1.4281005859375, "learning_rate": 3.162486473139603e-06, "loss": 1.2144, "step": 25749 }, { "epoch": 0.922162336383333, "grad_norm": 1.3970149755477905, "learning_rate": 3.1595931860001536e-06, "loss": 1.2464, "step": 25750 }, { "epoch": 0.9221981485129013, "grad_norm": 1.5348528623580933, "learning_rate": 3.1567012017234553e-06, "loss": 1.4653, "step": 25751 }, { "epoch": 0.9222339606424697, "grad_norm": 2.080173969268799, "learning_rate": 3.1538105203484323e-06, "loss": 1.4834, "step": 25752 }, { "epoch": 0.9222697727720379, "grad_norm": 1.4271656274795532, "learning_rate": 3.150921141913965e-06, "loss": 1.6015, "step": 25753 }, { "epoch": 0.9223055849016062, "grad_norm": 1.562336802482605, "learning_rate": 3.148033066458933e-06, "loss": 1.8834, "step": 25754 }, { "epoch": 0.9223413970311745, "grad_norm": 1.3763641119003296, "learning_rate": 3.145146294022172e-06, "loss": 1.3171, "step": 25755 }, { "epoch": 0.9223772091607427, "grad_norm": 1.3976991176605225, "learning_rate": 3.1422608246425513e-06, "loss": 1.185, "step": 25756 }, { "epoch": 0.922413021290311, "grad_norm": 1.4210984706878662, "learning_rate": 3.1393766583588614e-06, "loss": 1.6143, "step": 25757 }, { "epoch": 0.9224488334198793, "grad_norm": 1.7500970363616943, "learning_rate": 3.136493795209916e-06, "loss": 1.702, "step": 25758 }, { "epoch": 0.9224846455494476, "grad_norm": 2.02065372467041, "learning_rate": 3.1336122352345065e-06, "loss": 1.5751, "step": 25759 }, { "epoch": 0.9225204576790159, "grad_norm": 1.553234338760376, "learning_rate": 3.130731978471402e-06, "loss": 1.1833, "step": 25760 }, { "epoch": 0.9225562698085842, "grad_norm": 1.5323034524917603, "learning_rate": 3.1278530249593372e-06, "loss": 1.5451, "step": 25761 }, { "epoch": 0.9225920819381525, "grad_norm": 1.3176145553588867, "learning_rate": 3.124975374737049e-06, "loss": 1.055, "step": 25762 }, { "epoch": 0.9226278940677207, "grad_norm": 2.0297014713287354, "learning_rate": 3.1220990278432727e-06, "loss": 1.3966, "step": 25763 }, { "epoch": 0.922663706197289, "grad_norm": 1.19528067111969, "learning_rate": 3.119223984316677e-06, "loss": 1.4489, "step": 25764 }, { "epoch": 0.9226995183268573, "grad_norm": 1.6679394245147705, "learning_rate": 3.1163502441959647e-06, "loss": 1.1535, "step": 25765 }, { "epoch": 0.9227353304564256, "grad_norm": 1.6907655000686646, "learning_rate": 3.113477807519782e-06, "loss": 1.6175, "step": 25766 }, { "epoch": 0.9227711425859939, "grad_norm": 1.7465883493423462, "learning_rate": 3.110606674326788e-06, "loss": 1.6063, "step": 25767 }, { "epoch": 0.9228069547155622, "grad_norm": 1.6468842029571533, "learning_rate": 3.1077368446555956e-06, "loss": 1.378, "step": 25768 }, { "epoch": 0.9228427668451304, "grad_norm": 1.6969002485275269, "learning_rate": 3.104868318544818e-06, "loss": 1.4272, "step": 25769 }, { "epoch": 0.9228785789746987, "grad_norm": 1.4377319812774658, "learning_rate": 3.1020010960330583e-06, "loss": 1.6473, "step": 25770 }, { "epoch": 0.922914391104267, "grad_norm": 2.1896963119506836, "learning_rate": 3.0991351771588963e-06, "loss": 1.2011, "step": 25771 }, { "epoch": 0.9229502032338353, "grad_norm": 1.6838210821151733, "learning_rate": 3.0962705619608565e-06, "loss": 1.372, "step": 25772 }, { "epoch": 0.9229860153634036, "grad_norm": 1.592898964881897, "learning_rate": 3.093407250477509e-06, "loss": 1.4824, "step": 25773 }, { "epoch": 0.9230218274929719, "grad_norm": 1.7946579456329346, "learning_rate": 3.0905452427473667e-06, "loss": 1.4023, "step": 25774 }, { "epoch": 0.9230576396225402, "grad_norm": 2.0756218433380127, "learning_rate": 3.0876845388089327e-06, "loss": 1.4185, "step": 25775 }, { "epoch": 0.9230934517521084, "grad_norm": 1.2983440160751343, "learning_rate": 3.084825138700698e-06, "loss": 1.4131, "step": 25776 }, { "epoch": 0.9231292638816767, "grad_norm": 2.0300850868225098, "learning_rate": 3.08196704246112e-06, "loss": 1.5474, "step": 25777 }, { "epoch": 0.923165076011245, "grad_norm": 2.3360812664031982, "learning_rate": 3.0791102501286804e-06, "loss": 1.1895, "step": 25778 }, { "epoch": 0.9232008881408132, "grad_norm": 1.792782187461853, "learning_rate": 3.0762547617417703e-06, "loss": 1.308, "step": 25779 }, { "epoch": 0.9232367002703816, "grad_norm": 1.6354910135269165, "learning_rate": 3.0734005773388364e-06, "loss": 1.405, "step": 25780 }, { "epoch": 0.9232725123999499, "grad_norm": 1.6714788675308228, "learning_rate": 3.0705476969582813e-06, "loss": 1.2023, "step": 25781 }, { "epoch": 0.9233083245295182, "grad_norm": 1.793521523475647, "learning_rate": 3.0676961206384746e-06, "loss": 1.3668, "step": 25782 }, { "epoch": 0.9233441366590864, "grad_norm": 1.6857377290725708, "learning_rate": 3.0648458484177746e-06, "loss": 1.5258, "step": 25783 }, { "epoch": 0.9233799487886547, "grad_norm": 1.6202548742294312, "learning_rate": 3.061996880334539e-06, "loss": 1.2652, "step": 25784 }, { "epoch": 0.923415760918223, "grad_norm": 1.563083291053772, "learning_rate": 3.059149216427104e-06, "loss": 1.5697, "step": 25785 }, { "epoch": 0.9234515730477912, "grad_norm": 1.3261644840240479, "learning_rate": 3.0563028567337614e-06, "loss": 1.38, "step": 25786 }, { "epoch": 0.9234873851773596, "grad_norm": 1.2361618280410767, "learning_rate": 3.053457801292814e-06, "loss": 1.3723, "step": 25787 }, { "epoch": 0.9235231973069279, "grad_norm": 1.499376893043518, "learning_rate": 3.0506140501425417e-06, "loss": 1.4285, "step": 25788 }, { "epoch": 0.9235590094364962, "grad_norm": 1.281010389328003, "learning_rate": 3.0477716033212032e-06, "loss": 1.4848, "step": 25789 }, { "epoch": 0.9235948215660644, "grad_norm": 2.0112226009368896, "learning_rate": 3.044930460867046e-06, "loss": 1.4768, "step": 25790 }, { "epoch": 0.9236306336956327, "grad_norm": 1.588922142982483, "learning_rate": 3.042090622818272e-06, "loss": 1.2698, "step": 25791 }, { "epoch": 0.923666445825201, "grad_norm": 1.5995523929595947, "learning_rate": 3.039252089213118e-06, "loss": 1.3193, "step": 25792 }, { "epoch": 0.9237022579547692, "grad_norm": 2.97119140625, "learning_rate": 3.0364148600897423e-06, "loss": 1.5305, "step": 25793 }, { "epoch": 0.9237380700843376, "grad_norm": 2.8016178607940674, "learning_rate": 3.0335789354863362e-06, "loss": 1.5246, "step": 25794 }, { "epoch": 0.9237738822139059, "grad_norm": 1.5737606287002563, "learning_rate": 3.0307443154410365e-06, "loss": 1.42, "step": 25795 }, { "epoch": 0.9238096943434742, "grad_norm": 1.4103105068206787, "learning_rate": 3.027910999992012e-06, "loss": 1.4139, "step": 25796 }, { "epoch": 0.9238455064730424, "grad_norm": 1.6987563371658325, "learning_rate": 3.0250789891773433e-06, "loss": 1.6168, "step": 25797 }, { "epoch": 0.9238813186026107, "grad_norm": 1.6376010179519653, "learning_rate": 3.022248283035156e-06, "loss": 1.3091, "step": 25798 }, { "epoch": 0.923917130732179, "grad_norm": 1.1662794351577759, "learning_rate": 3.0194188816035305e-06, "loss": 1.2575, "step": 25799 }, { "epoch": 0.9239529428617472, "grad_norm": 1.8269308805465698, "learning_rate": 3.0165907849205254e-06, "loss": 1.2645, "step": 25800 }, { "epoch": 0.9239887549913156, "grad_norm": 1.7312307357788086, "learning_rate": 3.013763993024188e-06, "loss": 1.2861, "step": 25801 }, { "epoch": 0.9240245671208839, "grad_norm": 1.7468078136444092, "learning_rate": 3.010938505952543e-06, "loss": 1.4326, "step": 25802 }, { "epoch": 0.9240603792504521, "grad_norm": 1.40182363986969, "learning_rate": 3.008114323743627e-06, "loss": 1.2857, "step": 25803 }, { "epoch": 0.9240961913800204, "grad_norm": 1.6253925561904907, "learning_rate": 3.005291446435421e-06, "loss": 1.4106, "step": 25804 }, { "epoch": 0.9241320035095887, "grad_norm": 1.5819063186645508, "learning_rate": 3.002469874065894e-06, "loss": 1.2463, "step": 25805 }, { "epoch": 0.924167815639157, "grad_norm": 1.8966995477676392, "learning_rate": 2.999649606673027e-06, "loss": 1.4093, "step": 25806 }, { "epoch": 0.9242036277687252, "grad_norm": 1.4328267574310303, "learning_rate": 2.996830644294757e-06, "loss": 1.2878, "step": 25807 }, { "epoch": 0.9242394398982936, "grad_norm": 2.1859099864959717, "learning_rate": 2.994012986969008e-06, "loss": 1.3069, "step": 25808 }, { "epoch": 0.9242752520278619, "grad_norm": 1.6977571249008179, "learning_rate": 2.991196634733662e-06, "loss": 1.162, "step": 25809 }, { "epoch": 0.9243110641574301, "grad_norm": 1.3135548830032349, "learning_rate": 2.9883815876266653e-06, "loss": 1.3733, "step": 25810 }, { "epoch": 0.9243468762869984, "grad_norm": 1.7272981405258179, "learning_rate": 2.985567845685833e-06, "loss": 1.5297, "step": 25811 }, { "epoch": 0.9243826884165667, "grad_norm": 1.472185492515564, "learning_rate": 2.982755408949067e-06, "loss": 1.3625, "step": 25812 }, { "epoch": 0.924418500546135, "grad_norm": 1.6928966045379639, "learning_rate": 2.97994427745415e-06, "loss": 1.4356, "step": 25813 }, { "epoch": 0.9244543126757032, "grad_norm": 1.5978893041610718, "learning_rate": 2.977134451238972e-06, "loss": 1.322, "step": 25814 }, { "epoch": 0.9244901248052716, "grad_norm": 1.2723480463027954, "learning_rate": 2.9743259303412707e-06, "loss": 1.3138, "step": 25815 }, { "epoch": 0.9245259369348399, "grad_norm": 1.316504716873169, "learning_rate": 2.9715187147988823e-06, "loss": 1.177, "step": 25816 }, { "epoch": 0.9245617490644081, "grad_norm": 1.8215168714523315, "learning_rate": 2.968712804649543e-06, "loss": 1.5501, "step": 25817 }, { "epoch": 0.9245975611939764, "grad_norm": 1.5576127767562866, "learning_rate": 2.9659081999310112e-06, "loss": 1.3463, "step": 25818 }, { "epoch": 0.9246333733235447, "grad_norm": 1.40639328956604, "learning_rate": 2.9631049006810243e-06, "loss": 1.1919, "step": 25819 }, { "epoch": 0.9246691854531129, "grad_norm": 1.4003286361694336, "learning_rate": 2.9603029069372733e-06, "loss": 1.5546, "step": 25820 }, { "epoch": 0.9247049975826812, "grad_norm": 1.6173608303070068, "learning_rate": 2.9575022187374958e-06, "loss": 1.5284, "step": 25821 }, { "epoch": 0.9247408097122496, "grad_norm": 1.644357442855835, "learning_rate": 2.9547028361193495e-06, "loss": 1.3983, "step": 25822 }, { "epoch": 0.9247766218418179, "grad_norm": 1.6090519428253174, "learning_rate": 2.951904759120494e-06, "loss": 1.4281, "step": 25823 }, { "epoch": 0.9248124339713861, "grad_norm": 1.6314555406570435, "learning_rate": 2.9491079877785767e-06, "loss": 1.3319, "step": 25824 }, { "epoch": 0.9248482461009544, "grad_norm": 1.707886815071106, "learning_rate": 2.9463125221312117e-06, "loss": 1.2501, "step": 25825 }, { "epoch": 0.9248840582305227, "grad_norm": 1.4632911682128906, "learning_rate": 2.9435183622160465e-06, "loss": 1.526, "step": 25826 }, { "epoch": 0.9249198703600909, "grad_norm": 1.7945195436477661, "learning_rate": 2.9407255080706297e-06, "loss": 1.3357, "step": 25827 }, { "epoch": 0.9249556824896592, "grad_norm": 1.7915374040603638, "learning_rate": 2.937933959732553e-06, "loss": 1.5678, "step": 25828 }, { "epoch": 0.9249914946192276, "grad_norm": 1.4077473878860474, "learning_rate": 2.9351437172393746e-06, "loss": 1.5492, "step": 25829 }, { "epoch": 0.9250273067487959, "grad_norm": 1.4841781854629517, "learning_rate": 2.9323547806286432e-06, "loss": 1.391, "step": 25830 }, { "epoch": 0.9250631188783641, "grad_norm": 2.442631721496582, "learning_rate": 2.9295671499378506e-06, "loss": 1.6424, "step": 25831 }, { "epoch": 0.9250989310079324, "grad_norm": 1.5331733226776123, "learning_rate": 2.9267808252045338e-06, "loss": 1.4525, "step": 25832 }, { "epoch": 0.9251347431375007, "grad_norm": 2.0141773223876953, "learning_rate": 2.923995806466173e-06, "loss": 1.367, "step": 25833 }, { "epoch": 0.9251705552670689, "grad_norm": 1.460148572921753, "learning_rate": 2.9212120937602174e-06, "loss": 1.5702, "step": 25834 }, { "epoch": 0.9252063673966372, "grad_norm": 1.5255372524261475, "learning_rate": 2.9184296871241357e-06, "loss": 1.3125, "step": 25835 }, { "epoch": 0.9252421795262056, "grad_norm": 1.992004156112671, "learning_rate": 2.9156485865953544e-06, "loss": 1.5373, "step": 25836 }, { "epoch": 0.9252779916557738, "grad_norm": 1.4714744091033936, "learning_rate": 2.9128687922112987e-06, "loss": 1.3717, "step": 25837 }, { "epoch": 0.9253138037853421, "grad_norm": 1.6506719589233398, "learning_rate": 2.91009030400935e-06, "loss": 1.3876, "step": 25838 }, { "epoch": 0.9253496159149104, "grad_norm": 1.959061622619629, "learning_rate": 2.9073131220269e-06, "loss": 1.2011, "step": 25839 }, { "epoch": 0.9253854280444787, "grad_norm": 1.3772485256195068, "learning_rate": 2.9045372463013088e-06, "loss": 1.3579, "step": 25840 }, { "epoch": 0.9254212401740469, "grad_norm": 2.8980002403259277, "learning_rate": 2.9017626768699346e-06, "loss": 1.3286, "step": 25841 }, { "epoch": 0.9254570523036152, "grad_norm": 2.0866899490356445, "learning_rate": 2.8989894137700924e-06, "loss": 1.7091, "step": 25842 }, { "epoch": 0.9254928644331836, "grad_norm": 1.37162446975708, "learning_rate": 2.8962174570390965e-06, "loss": 1.5254, "step": 25843 }, { "epoch": 0.9255286765627518, "grad_norm": 1.817568063735962, "learning_rate": 2.8934468067142396e-06, "loss": 1.1529, "step": 25844 }, { "epoch": 0.9255644886923201, "grad_norm": 1.9801509380340576, "learning_rate": 2.8906774628327917e-06, "loss": 1.5762, "step": 25845 }, { "epoch": 0.9256003008218884, "grad_norm": 1.6975454092025757, "learning_rate": 2.8879094254320225e-06, "loss": 1.2415, "step": 25846 }, { "epoch": 0.9256361129514566, "grad_norm": 2.336732864379883, "learning_rate": 2.8851426945491588e-06, "loss": 1.4693, "step": 25847 }, { "epoch": 0.9256719250810249, "grad_norm": 1.9778311252593994, "learning_rate": 2.882377270221448e-06, "loss": 1.4825, "step": 25848 }, { "epoch": 0.9257077372105932, "grad_norm": 1.873709797859192, "learning_rate": 2.8796131524860603e-06, "loss": 1.408, "step": 25849 }, { "epoch": 0.9257435493401616, "grad_norm": 1.574170470237732, "learning_rate": 2.8768503413802108e-06, "loss": 1.3283, "step": 25850 }, { "epoch": 0.9257793614697298, "grad_norm": 1.7731971740722656, "learning_rate": 2.8740888369410577e-06, "loss": 1.6502, "step": 25851 }, { "epoch": 0.9258151735992981, "grad_norm": 1.3365644216537476, "learning_rate": 2.8713286392057614e-06, "loss": 1.4837, "step": 25852 }, { "epoch": 0.9258509857288664, "grad_norm": 1.4238344430923462, "learning_rate": 2.868569748211436e-06, "loss": 1.3521, "step": 25853 }, { "epoch": 0.9258867978584346, "grad_norm": 1.5015554428100586, "learning_rate": 2.8658121639952297e-06, "loss": 1.5199, "step": 25854 }, { "epoch": 0.9259226099880029, "grad_norm": 1.2220364809036255, "learning_rate": 2.8630558865942237e-06, "loss": 1.33, "step": 25855 }, { "epoch": 0.9259584221175712, "grad_norm": 1.678745985031128, "learning_rate": 2.8603009160454995e-06, "loss": 1.8247, "step": 25856 }, { "epoch": 0.9259942342471396, "grad_norm": 1.7295626401901245, "learning_rate": 2.857547252386117e-06, "loss": 1.4945, "step": 25857 }, { "epoch": 0.9260300463767078, "grad_norm": 1.4009100198745728, "learning_rate": 2.854794895653146e-06, "loss": 1.1137, "step": 25858 }, { "epoch": 0.9260658585062761, "grad_norm": 1.4563788175582886, "learning_rate": 2.8520438458836007e-06, "loss": 1.5361, "step": 25859 }, { "epoch": 0.9261016706358444, "grad_norm": 1.6020429134368896, "learning_rate": 2.849294103114486e-06, "loss": 1.329, "step": 25860 }, { "epoch": 0.9261374827654126, "grad_norm": 1.4788800477981567, "learning_rate": 2.846545667382805e-06, "loss": 1.3567, "step": 25861 }, { "epoch": 0.9261732948949809, "grad_norm": 1.8151326179504395, "learning_rate": 2.8437985387255394e-06, "loss": 1.7524, "step": 25862 }, { "epoch": 0.9262091070245492, "grad_norm": 1.3136143684387207, "learning_rate": 2.8410527171796376e-06, "loss": 1.109, "step": 25863 }, { "epoch": 0.9262449191541176, "grad_norm": 3.2092204093933105, "learning_rate": 2.838308202782036e-06, "loss": 1.8549, "step": 25864 }, { "epoch": 0.9262807312836858, "grad_norm": 1.5936020612716675, "learning_rate": 2.835564995569684e-06, "loss": 1.6723, "step": 25865 }, { "epoch": 0.9263165434132541, "grad_norm": 1.55735445022583, "learning_rate": 2.8328230955794733e-06, "loss": 1.3075, "step": 25866 }, { "epoch": 0.9263523555428224, "grad_norm": 1.6565905809402466, "learning_rate": 2.8300825028482748e-06, "loss": 1.3222, "step": 25867 }, { "epoch": 0.9263881676723906, "grad_norm": 1.7441405057907104, "learning_rate": 2.827343217412981e-06, "loss": 1.2376, "step": 25868 }, { "epoch": 0.9264239798019589, "grad_norm": 1.8766648769378662, "learning_rate": 2.8246052393104516e-06, "loss": 1.4782, "step": 25869 }, { "epoch": 0.9264597919315272, "grad_norm": 1.460390329360962, "learning_rate": 2.8218685685775015e-06, "loss": 1.4786, "step": 25870 }, { "epoch": 0.9264956040610955, "grad_norm": 1.593667984008789, "learning_rate": 2.8191332052509567e-06, "loss": 1.6359, "step": 25871 }, { "epoch": 0.9265314161906638, "grad_norm": 1.744284987449646, "learning_rate": 2.8163991493676212e-06, "loss": 1.2058, "step": 25872 }, { "epoch": 0.9265672283202321, "grad_norm": 1.8081164360046387, "learning_rate": 2.8136664009642877e-06, "loss": 1.678, "step": 25873 }, { "epoch": 0.9266030404498004, "grad_norm": 1.7997514009475708, "learning_rate": 2.8109349600777045e-06, "loss": 1.6136, "step": 25874 }, { "epoch": 0.9266388525793686, "grad_norm": 1.647782802581787, "learning_rate": 2.8082048267446203e-06, "loss": 1.1104, "step": 25875 }, { "epoch": 0.9266746647089369, "grad_norm": 1.5916073322296143, "learning_rate": 2.805476001001772e-06, "loss": 1.5106, "step": 25876 }, { "epoch": 0.9267104768385052, "grad_norm": 1.8126418590545654, "learning_rate": 2.802748482885886e-06, "loss": 1.3683, "step": 25877 }, { "epoch": 0.9267462889680735, "grad_norm": 1.6614654064178467, "learning_rate": 2.800022272433633e-06, "loss": 1.3221, "step": 25878 }, { "epoch": 0.9267821010976418, "grad_norm": 1.9782456159591675, "learning_rate": 2.797297369681706e-06, "loss": 1.3235, "step": 25879 }, { "epoch": 0.9268179132272101, "grad_norm": 1.6730729341506958, "learning_rate": 2.7945737746667643e-06, "loss": 1.6093, "step": 25880 }, { "epoch": 0.9268537253567783, "grad_norm": 1.5324561595916748, "learning_rate": 2.7918514874254454e-06, "loss": 1.3519, "step": 25881 }, { "epoch": 0.9268895374863466, "grad_norm": 1.5539942979812622, "learning_rate": 2.789130507994364e-06, "loss": 1.1705, "step": 25882 }, { "epoch": 0.9269253496159149, "grad_norm": 1.2986265420913696, "learning_rate": 2.786410836410147e-06, "loss": 1.4491, "step": 25883 }, { "epoch": 0.9269611617454832, "grad_norm": 1.458767056465149, "learning_rate": 2.783692472709376e-06, "loss": 1.5184, "step": 25884 }, { "epoch": 0.9269969738750515, "grad_norm": 1.4174643754959106, "learning_rate": 2.7809754169286216e-06, "loss": 1.3685, "step": 25885 }, { "epoch": 0.9270327860046198, "grad_norm": 1.9586244821548462, "learning_rate": 2.7782596691044327e-06, "loss": 1.3631, "step": 25886 }, { "epoch": 0.9270685981341881, "grad_norm": 1.8806778192520142, "learning_rate": 2.7755452292733684e-06, "loss": 1.5604, "step": 25887 }, { "epoch": 0.9271044102637563, "grad_norm": 1.5468672513961792, "learning_rate": 2.7728320974719225e-06, "loss": 1.0677, "step": 25888 }, { "epoch": 0.9271402223933246, "grad_norm": 1.300863265991211, "learning_rate": 2.7701202737366096e-06, "loss": 1.379, "step": 25889 }, { "epoch": 0.9271760345228929, "grad_norm": 1.3959417343139648, "learning_rate": 2.7674097581039004e-06, "loss": 1.4749, "step": 25890 }, { "epoch": 0.9272118466524611, "grad_norm": 1.5664010047912598, "learning_rate": 2.7647005506102886e-06, "loss": 1.2282, "step": 25891 }, { "epoch": 0.9272476587820295, "grad_norm": 2.319263458251953, "learning_rate": 2.7619926512921888e-06, "loss": 1.4032, "step": 25892 }, { "epoch": 0.9272834709115978, "grad_norm": 1.7327048778533936, "learning_rate": 2.7592860601860616e-06, "loss": 1.7905, "step": 25893 }, { "epoch": 0.9273192830411661, "grad_norm": 1.7181236743927002, "learning_rate": 2.7565807773282994e-06, "loss": 1.3992, "step": 25894 }, { "epoch": 0.9273550951707343, "grad_norm": 1.821083426475525, "learning_rate": 2.7538768027553174e-06, "loss": 1.6069, "step": 25895 }, { "epoch": 0.9273909073003026, "grad_norm": 1.5232264995574951, "learning_rate": 2.751174136503498e-06, "loss": 1.3917, "step": 25896 }, { "epoch": 0.9274267194298709, "grad_norm": 1.4377104043960571, "learning_rate": 2.748472778609157e-06, "loss": 1.4063, "step": 25897 }, { "epoch": 0.9274625315594391, "grad_norm": 1.4285274744033813, "learning_rate": 2.7457727291086867e-06, "loss": 1.4704, "step": 25898 }, { "epoch": 0.9274983436890075, "grad_norm": 1.573488473892212, "learning_rate": 2.7430739880383915e-06, "loss": 1.3264, "step": 25899 }, { "epoch": 0.9275341558185758, "grad_norm": 1.4215755462646484, "learning_rate": 2.7403765554345984e-06, "loss": 1.3079, "step": 25900 }, { "epoch": 0.9275699679481441, "grad_norm": 1.3711589574813843, "learning_rate": 2.737680431333556e-06, "loss": 1.3841, "step": 25901 }, { "epoch": 0.9276057800777123, "grad_norm": 1.6720585823059082, "learning_rate": 2.7349856157715793e-06, "loss": 1.4348, "step": 25902 }, { "epoch": 0.9276415922072806, "grad_norm": 1.738786220550537, "learning_rate": 2.7322921087849063e-06, "loss": 1.5032, "step": 25903 }, { "epoch": 0.9276774043368489, "grad_norm": 2.6275978088378906, "learning_rate": 2.7295999104097746e-06, "loss": 1.3313, "step": 25904 }, { "epoch": 0.9277132164664171, "grad_norm": 1.735266923904419, "learning_rate": 2.726909020682422e-06, "loss": 1.2613, "step": 25905 }, { "epoch": 0.9277490285959855, "grad_norm": 1.6922903060913086, "learning_rate": 2.72421943963902e-06, "loss": 1.3072, "step": 25906 }, { "epoch": 0.9277848407255538, "grad_norm": 1.5648711919784546, "learning_rate": 2.7215311673157715e-06, "loss": 1.5856, "step": 25907 }, { "epoch": 0.927820652855122, "grad_norm": 1.460918664932251, "learning_rate": 2.718844203748827e-06, "loss": 1.3441, "step": 25908 }, { "epoch": 0.9278564649846903, "grad_norm": 1.5979174375534058, "learning_rate": 2.716158548974379e-06, "loss": 1.6612, "step": 25909 }, { "epoch": 0.9278922771142586, "grad_norm": 1.7928675413131714, "learning_rate": 2.71347420302851e-06, "loss": 1.5751, "step": 25910 }, { "epoch": 0.9279280892438269, "grad_norm": 1.5402860641479492, "learning_rate": 2.7107911659473682e-06, "loss": 1.2454, "step": 25911 }, { "epoch": 0.9279639013733951, "grad_norm": 1.4214240312576294, "learning_rate": 2.708109437767015e-06, "loss": 1.1149, "step": 25912 }, { "epoch": 0.9279997135029635, "grad_norm": 1.4117375612258911, "learning_rate": 2.705429018523575e-06, "loss": 1.102, "step": 25913 }, { "epoch": 0.9280355256325318, "grad_norm": 1.7786318063735962, "learning_rate": 2.702749908253077e-06, "loss": 1.2132, "step": 25914 }, { "epoch": 0.9280713377621, "grad_norm": 1.6047890186309814, "learning_rate": 2.70007210699158e-06, "loss": 1.266, "step": 25915 }, { "epoch": 0.9281071498916683, "grad_norm": 2.034290075302124, "learning_rate": 2.697395614775089e-06, "loss": 1.2727, "step": 25916 }, { "epoch": 0.9281429620212366, "grad_norm": 1.5755672454833984, "learning_rate": 2.694720431639641e-06, "loss": 1.0504, "step": 25917 }, { "epoch": 0.9281787741508049, "grad_norm": 1.8133633136749268, "learning_rate": 2.6920465576212195e-06, "loss": 1.6142, "step": 25918 }, { "epoch": 0.9282145862803731, "grad_norm": 2.0033183097839355, "learning_rate": 2.6893739927557725e-06, "loss": 1.4867, "step": 25919 }, { "epoch": 0.9282503984099415, "grad_norm": 1.8075355291366577, "learning_rate": 2.6867027370793053e-06, "loss": 1.2702, "step": 25920 }, { "epoch": 0.9282862105395098, "grad_norm": 1.8477706909179688, "learning_rate": 2.684032790627722e-06, "loss": 1.1577, "step": 25921 }, { "epoch": 0.928322022669078, "grad_norm": 1.8060771226882935, "learning_rate": 2.6813641534369383e-06, "loss": 1.4247, "step": 25922 }, { "epoch": 0.9283578347986463, "grad_norm": 1.3779622316360474, "learning_rate": 2.678696825542859e-06, "loss": 1.5116, "step": 25923 }, { "epoch": 0.9283936469282146, "grad_norm": 1.5192153453826904, "learning_rate": 2.676030806981389e-06, "loss": 1.442, "step": 25924 }, { "epoch": 0.9284294590577828, "grad_norm": 1.7140536308288574, "learning_rate": 2.673366097788399e-06, "loss": 1.4663, "step": 25925 }, { "epoch": 0.9284652711873511, "grad_norm": 1.3808923959732056, "learning_rate": 2.670702697999705e-06, "loss": 1.4116, "step": 25926 }, { "epoch": 0.9285010833169195, "grad_norm": 1.517737865447998, "learning_rate": 2.6680406076511677e-06, "loss": 1.3334, "step": 25927 }, { "epoch": 0.9285368954464878, "grad_norm": 1.7509211301803589, "learning_rate": 2.6653798267785912e-06, "loss": 1.2938, "step": 25928 }, { "epoch": 0.928572707576056, "grad_norm": 1.7517247200012207, "learning_rate": 2.6627203554177916e-06, "loss": 1.1616, "step": 25929 }, { "epoch": 0.9286085197056243, "grad_norm": 1.3450393676757812, "learning_rate": 2.660062193604518e-06, "loss": 1.4951, "step": 25930 }, { "epoch": 0.9286443318351926, "grad_norm": 1.2545347213745117, "learning_rate": 2.6574053413745524e-06, "loss": 1.2913, "step": 25931 }, { "epoch": 0.9286801439647608, "grad_norm": 1.784175992012024, "learning_rate": 2.654749798763645e-06, "loss": 1.8439, "step": 25932 }, { "epoch": 0.9287159560943291, "grad_norm": 1.5474541187286377, "learning_rate": 2.6520955658074997e-06, "loss": 1.6509, "step": 25933 }, { "epoch": 0.9287517682238975, "grad_norm": 1.63936448097229, "learning_rate": 2.649442642541833e-06, "loss": 1.5111, "step": 25934 }, { "epoch": 0.9287875803534658, "grad_norm": 1.4332921504974365, "learning_rate": 2.646791029002349e-06, "loss": 1.5307, "step": 25935 }, { "epoch": 0.928823392483034, "grad_norm": 1.8409286737442017, "learning_rate": 2.6441407252247306e-06, "loss": 1.5314, "step": 25936 }, { "epoch": 0.9288592046126023, "grad_norm": 1.276275396347046, "learning_rate": 2.641491731244605e-06, "loss": 1.4106, "step": 25937 }, { "epoch": 0.9288950167421706, "grad_norm": 1.8311176300048828, "learning_rate": 2.6388440470976217e-06, "loss": 1.326, "step": 25938 }, { "epoch": 0.9289308288717388, "grad_norm": 2.0258560180664062, "learning_rate": 2.6361976728194183e-06, "loss": 1.3223, "step": 25939 }, { "epoch": 0.9289666410013071, "grad_norm": 1.7356675863265991, "learning_rate": 2.6335526084455665e-06, "loss": 1.4339, "step": 25940 }, { "epoch": 0.9290024531308755, "grad_norm": 1.5074106454849243, "learning_rate": 2.630908854011682e-06, "loss": 1.4021, "step": 25941 }, { "epoch": 0.9290382652604438, "grad_norm": 1.8826628923416138, "learning_rate": 2.628266409553315e-06, "loss": 1.5917, "step": 25942 }, { "epoch": 0.929074077390012, "grad_norm": 1.2302558422088623, "learning_rate": 2.625625275106036e-06, "loss": 1.2319, "step": 25943 }, { "epoch": 0.9291098895195803, "grad_norm": 1.6267286539077759, "learning_rate": 2.6229854507053507e-06, "loss": 1.6428, "step": 25944 }, { "epoch": 0.9291457016491486, "grad_norm": 1.582241415977478, "learning_rate": 2.6203469363867973e-06, "loss": 1.4682, "step": 25945 }, { "epoch": 0.9291815137787168, "grad_norm": 1.7281001806259155, "learning_rate": 2.6177097321858578e-06, "loss": 1.4495, "step": 25946 }, { "epoch": 0.9292173259082851, "grad_norm": 2.26235294342041, "learning_rate": 2.615073838138027e-06, "loss": 1.5354, "step": 25947 }, { "epoch": 0.9292531380378534, "grad_norm": 1.591934084892273, "learning_rate": 2.6124392542787645e-06, "loss": 1.4247, "step": 25948 }, { "epoch": 0.9292889501674217, "grad_norm": 2.2034246921539307, "learning_rate": 2.609805980643498e-06, "loss": 1.4845, "step": 25949 }, { "epoch": 0.92932476229699, "grad_norm": 2.1909966468811035, "learning_rate": 2.607174017267677e-06, "loss": 1.3116, "step": 25950 }, { "epoch": 0.9293605744265583, "grad_norm": 1.33592689037323, "learning_rate": 2.6045433641866958e-06, "loss": 1.4352, "step": 25951 }, { "epoch": 0.9293963865561266, "grad_norm": 1.6754401922225952, "learning_rate": 2.6019140214359585e-06, "loss": 1.4147, "step": 25952 }, { "epoch": 0.9294321986856948, "grad_norm": 2.1295597553253174, "learning_rate": 2.599285989050826e-06, "loss": 1.6047, "step": 25953 }, { "epoch": 0.9294680108152631, "grad_norm": 2.042235851287842, "learning_rate": 2.59665926706667e-06, "loss": 1.5889, "step": 25954 }, { "epoch": 0.9295038229448314, "grad_norm": 1.2845739126205444, "learning_rate": 2.594033855518818e-06, "loss": 1.4005, "step": 25955 }, { "epoch": 0.9295396350743997, "grad_norm": 1.5340960025787354, "learning_rate": 2.5914097544425975e-06, "loss": 1.5359, "step": 25956 }, { "epoch": 0.929575447203968, "grad_norm": 1.4688477516174316, "learning_rate": 2.588786963873313e-06, "loss": 1.603, "step": 25957 }, { "epoch": 0.9296112593335363, "grad_norm": 1.4088783264160156, "learning_rate": 2.586165483846248e-06, "loss": 1.496, "step": 25958 }, { "epoch": 0.9296470714631045, "grad_norm": 1.6192882061004639, "learning_rate": 2.5835453143966627e-06, "loss": 1.5361, "step": 25959 }, { "epoch": 0.9296828835926728, "grad_norm": 1.3497220277786255, "learning_rate": 2.580926455559829e-06, "loss": 1.3451, "step": 25960 }, { "epoch": 0.9297186957222411, "grad_norm": 1.6116409301757812, "learning_rate": 2.5783089073709633e-06, "loss": 1.3246, "step": 25961 }, { "epoch": 0.9297545078518094, "grad_norm": 1.3760795593261719, "learning_rate": 2.5756926698652816e-06, "loss": 1.3463, "step": 25962 }, { "epoch": 0.9297903199813777, "grad_norm": 1.4690639972686768, "learning_rate": 2.5730777430779895e-06, "loss": 1.2777, "step": 25963 }, { "epoch": 0.929826132110946, "grad_norm": 1.1338255405426025, "learning_rate": 2.57046412704427e-06, "loss": 1.4689, "step": 25964 }, { "epoch": 0.9298619442405143, "grad_norm": 2.178570032119751, "learning_rate": 2.567851821799283e-06, "loss": 1.7218, "step": 25965 }, { "epoch": 0.9298977563700825, "grad_norm": 1.6850600242614746, "learning_rate": 2.565240827378157e-06, "loss": 1.2855, "step": 25966 }, { "epoch": 0.9299335684996508, "grad_norm": 1.398482322692871, "learning_rate": 2.562631143816041e-06, "loss": 1.212, "step": 25967 }, { "epoch": 0.9299693806292191, "grad_norm": 1.6965020895004272, "learning_rate": 2.560022771148052e-06, "loss": 1.4466, "step": 25968 }, { "epoch": 0.9300051927587873, "grad_norm": 2.532001256942749, "learning_rate": 2.55741570940925e-06, "loss": 1.3208, "step": 25969 }, { "epoch": 0.9300410048883557, "grad_norm": 1.4785244464874268, "learning_rate": 2.5548099586347296e-06, "loss": 1.2717, "step": 25970 }, { "epoch": 0.930076817017924, "grad_norm": 1.5374987125396729, "learning_rate": 2.552205518859552e-06, "loss": 1.4995, "step": 25971 }, { "epoch": 0.9301126291474923, "grad_norm": 1.8516207933425903, "learning_rate": 2.549602390118755e-06, "loss": 1.3219, "step": 25972 }, { "epoch": 0.9301484412770605, "grad_norm": 1.6778855323791504, "learning_rate": 2.5470005724473447e-06, "loss": 1.4766, "step": 25973 }, { "epoch": 0.9301842534066288, "grad_norm": 1.8335471153259277, "learning_rate": 2.544400065880337e-06, "loss": 1.6164, "step": 25974 }, { "epoch": 0.9302200655361971, "grad_norm": 1.762180209159851, "learning_rate": 2.5418008704527263e-06, "loss": 1.3349, "step": 25975 }, { "epoch": 0.9302558776657653, "grad_norm": 1.5323717594146729, "learning_rate": 2.5392029861994625e-06, "loss": 1.361, "step": 25976 }, { "epoch": 0.9302916897953337, "grad_norm": 1.7938995361328125, "learning_rate": 2.5366064131555066e-06, "loss": 1.4333, "step": 25977 }, { "epoch": 0.930327501924902, "grad_norm": 3.3588671684265137, "learning_rate": 2.534011151355797e-06, "loss": 1.7233, "step": 25978 }, { "epoch": 0.9303633140544703, "grad_norm": 1.9261466264724731, "learning_rate": 2.531417200835251e-06, "loss": 1.1339, "step": 25979 }, { "epoch": 0.9303991261840385, "grad_norm": 2.6531336307525635, "learning_rate": 2.528824561628762e-06, "loss": 1.3105, "step": 25980 }, { "epoch": 0.9304349383136068, "grad_norm": 1.9853758811950684, "learning_rate": 2.5262332337712025e-06, "loss": 1.5984, "step": 25981 }, { "epoch": 0.9304707504431751, "grad_norm": 2.0931293964385986, "learning_rate": 2.5236432172974333e-06, "loss": 1.4613, "step": 25982 }, { "epoch": 0.9305065625727433, "grad_norm": 1.676774263381958, "learning_rate": 2.521054512242338e-06, "loss": 1.3516, "step": 25983 }, { "epoch": 0.9305423747023117, "grad_norm": 2.231872797012329, "learning_rate": 2.5184671186406996e-06, "loss": 1.4991, "step": 25984 }, { "epoch": 0.93057818683188, "grad_norm": 1.5376390218734741, "learning_rate": 2.5158810365273345e-06, "loss": 1.341, "step": 25985 }, { "epoch": 0.9306139989614483, "grad_norm": 1.4266811609268188, "learning_rate": 2.5132962659370595e-06, "loss": 1.5016, "step": 25986 }, { "epoch": 0.9306498110910165, "grad_norm": 1.5460529327392578, "learning_rate": 2.510712806904625e-06, "loss": 1.1129, "step": 25987 }, { "epoch": 0.9306856232205848, "grad_norm": 1.3211203813552856, "learning_rate": 2.5081306594647912e-06, "loss": 1.3368, "step": 25988 }, { "epoch": 0.9307214353501531, "grad_norm": 1.648240566253662, "learning_rate": 2.505549823652309e-06, "loss": 1.2672, "step": 25989 }, { "epoch": 0.9307572474797213, "grad_norm": 1.4202256202697754, "learning_rate": 2.5029702995019055e-06, "loss": 1.5001, "step": 25990 }, { "epoch": 0.9307930596092897, "grad_norm": 1.5760564804077148, "learning_rate": 2.5003920870482644e-06, "loss": 1.4344, "step": 25991 }, { "epoch": 0.930828871738858, "grad_norm": 1.6077864170074463, "learning_rate": 2.4978151863260914e-06, "loss": 1.4363, "step": 25992 }, { "epoch": 0.9308646838684262, "grad_norm": 1.4821381568908691, "learning_rate": 2.495239597370047e-06, "loss": 1.5983, "step": 25993 }, { "epoch": 0.9309004959979945, "grad_norm": 1.120290994644165, "learning_rate": 2.492665320214771e-06, "loss": 1.4057, "step": 25994 }, { "epoch": 0.9309363081275628, "grad_norm": 1.2401329278945923, "learning_rate": 2.490092354894913e-06, "loss": 1.1569, "step": 25995 }, { "epoch": 0.930972120257131, "grad_norm": 1.77287757396698, "learning_rate": 2.4875207014450785e-06, "loss": 1.5907, "step": 25996 }, { "epoch": 0.9310079323866993, "grad_norm": 1.4243831634521484, "learning_rate": 2.4849503598998738e-06, "loss": 1.4151, "step": 25997 }, { "epoch": 0.9310437445162677, "grad_norm": 1.5774353742599487, "learning_rate": 2.4823813302938814e-06, "loss": 1.3764, "step": 25998 }, { "epoch": 0.931079556645836, "grad_norm": 1.5646772384643555, "learning_rate": 2.4798136126616634e-06, "loss": 1.5245, "step": 25999 }, { "epoch": 0.9311153687754042, "grad_norm": 1.583769679069519, "learning_rate": 2.477247207037736e-06, "loss": 1.7049, "step": 26000 }, { "epoch": 0.9311511809049725, "grad_norm": 1.5972886085510254, "learning_rate": 2.4746821134566833e-06, "loss": 1.2635, "step": 26001 }, { "epoch": 0.9311869930345408, "grad_norm": 1.2284495830535889, "learning_rate": 2.4721183319529774e-06, "loss": 1.3457, "step": 26002 }, { "epoch": 0.931222805164109, "grad_norm": 1.774373173713684, "learning_rate": 2.4695558625611015e-06, "loss": 1.2893, "step": 26003 }, { "epoch": 0.9312586172936773, "grad_norm": 2.132659435272217, "learning_rate": 2.4669947053155617e-06, "loss": 1.2126, "step": 26004 }, { "epoch": 0.9312944294232457, "grad_norm": 1.3666845560073853, "learning_rate": 2.464434860250786e-06, "loss": 1.5706, "step": 26005 }, { "epoch": 0.931330241552814, "grad_norm": 1.9008538722991943, "learning_rate": 2.461876327401247e-06, "loss": 1.3145, "step": 26006 }, { "epoch": 0.9313660536823822, "grad_norm": 1.4239145517349243, "learning_rate": 2.4593191068013164e-06, "loss": 1.3138, "step": 26007 }, { "epoch": 0.9314018658119505, "grad_norm": 1.7623815536499023, "learning_rate": 2.4567631984854566e-06, "loss": 1.5899, "step": 26008 }, { "epoch": 0.9314376779415188, "grad_norm": 1.5793737173080444, "learning_rate": 2.4542086024880174e-06, "loss": 1.5624, "step": 26009 }, { "epoch": 0.931473490071087, "grad_norm": 1.4286556243896484, "learning_rate": 2.4516553188433823e-06, "loss": 0.9605, "step": 26010 }, { "epoch": 0.9315093022006553, "grad_norm": 1.671534538269043, "learning_rate": 2.4491033475858795e-06, "loss": 1.5661, "step": 26011 }, { "epoch": 0.9315451143302237, "grad_norm": 2.992291212081909, "learning_rate": 2.44655268874987e-06, "loss": 1.1977, "step": 26012 }, { "epoch": 0.931580926459792, "grad_norm": 1.70440673828125, "learning_rate": 2.4440033423696717e-06, "loss": 1.4343, "step": 26013 }, { "epoch": 0.9316167385893602, "grad_norm": 2.031369924545288, "learning_rate": 2.4414553084795455e-06, "loss": 1.4702, "step": 26014 }, { "epoch": 0.9316525507189285, "grad_norm": 1.6914634704589844, "learning_rate": 2.4389085871138086e-06, "loss": 1.4862, "step": 26015 }, { "epoch": 0.9316883628484968, "grad_norm": 2.1513466835021973, "learning_rate": 2.4363631783067108e-06, "loss": 1.523, "step": 26016 }, { "epoch": 0.931724174978065, "grad_norm": 1.6219446659088135, "learning_rate": 2.4338190820925145e-06, "loss": 1.4038, "step": 26017 }, { "epoch": 0.9317599871076333, "grad_norm": 1.5040446519851685, "learning_rate": 2.4312762985054137e-06, "loss": 1.3775, "step": 26018 }, { "epoch": 0.9317957992372017, "grad_norm": 3.2530477046966553, "learning_rate": 2.4287348275796373e-06, "loss": 1.0825, "step": 26019 }, { "epoch": 0.93183161136677, "grad_norm": 1.5604746341705322, "learning_rate": 2.4261946693493797e-06, "loss": 1.0338, "step": 26020 }, { "epoch": 0.9318674234963382, "grad_norm": 2.0306684970855713, "learning_rate": 2.4236558238488025e-06, "loss": 1.4578, "step": 26021 }, { "epoch": 0.9319032356259065, "grad_norm": 2.1134817600250244, "learning_rate": 2.421118291112079e-06, "loss": 1.5503, "step": 26022 }, { "epoch": 0.9319390477554748, "grad_norm": 1.4790269136428833, "learning_rate": 2.4185820711733363e-06, "loss": 1.5024, "step": 26023 }, { "epoch": 0.931974859885043, "grad_norm": 1.7622898817062378, "learning_rate": 2.4160471640667147e-06, "loss": 1.5454, "step": 26024 }, { "epoch": 0.9320106720146113, "grad_norm": 1.9339481592178345, "learning_rate": 2.413513569826298e-06, "loss": 1.3688, "step": 26025 }, { "epoch": 0.9320464841441797, "grad_norm": 1.6316418647766113, "learning_rate": 2.410981288486169e-06, "loss": 1.4478, "step": 26026 }, { "epoch": 0.932082296273748, "grad_norm": 1.73930823802948, "learning_rate": 2.408450320080413e-06, "loss": 1.5425, "step": 26027 }, { "epoch": 0.9321181084033162, "grad_norm": 1.9473265409469604, "learning_rate": 2.40592066464308e-06, "loss": 1.5209, "step": 26028 }, { "epoch": 0.9321539205328845, "grad_norm": 1.8710750341415405, "learning_rate": 2.4033923222081868e-06, "loss": 1.3462, "step": 26029 }, { "epoch": 0.9321897326624528, "grad_norm": 1.5108715295791626, "learning_rate": 2.400865292809762e-06, "loss": 1.4425, "step": 26030 }, { "epoch": 0.932225544792021, "grad_norm": 1.6120551824569702, "learning_rate": 2.3983395764818008e-06, "loss": 1.2536, "step": 26031 }, { "epoch": 0.9322613569215893, "grad_norm": 1.3649541139602661, "learning_rate": 2.395815173258287e-06, "loss": 1.34, "step": 26032 }, { "epoch": 0.9322971690511577, "grad_norm": 1.4536439180374146, "learning_rate": 2.393292083173171e-06, "loss": 1.5061, "step": 26033 }, { "epoch": 0.9323329811807259, "grad_norm": 1.6911566257476807, "learning_rate": 2.390770306260415e-06, "loss": 1.3117, "step": 26034 }, { "epoch": 0.9323687933102942, "grad_norm": 1.714871883392334, "learning_rate": 2.388249842553936e-06, "loss": 1.5929, "step": 26035 }, { "epoch": 0.9324046054398625, "grad_norm": 1.709179162979126, "learning_rate": 2.385730692087651e-06, "loss": 1.5818, "step": 26036 }, { "epoch": 0.9324404175694307, "grad_norm": 1.9201855659484863, "learning_rate": 2.3832128548954334e-06, "loss": 1.3567, "step": 26037 }, { "epoch": 0.932476229698999, "grad_norm": 1.8978859186172485, "learning_rate": 2.3806963310111786e-06, "loss": 1.6552, "step": 26038 }, { "epoch": 0.9325120418285673, "grad_norm": 1.9330658912658691, "learning_rate": 2.3781811204687367e-06, "loss": 1.3294, "step": 26039 }, { "epoch": 0.9325478539581357, "grad_norm": 1.352783441543579, "learning_rate": 2.375667223301936e-06, "loss": 1.5671, "step": 26040 }, { "epoch": 0.9325836660877039, "grad_norm": 1.6923365592956543, "learning_rate": 2.3731546395446056e-06, "loss": 1.4242, "step": 26041 }, { "epoch": 0.9326194782172722, "grad_norm": 1.456566333770752, "learning_rate": 2.370643369230563e-06, "loss": 1.8199, "step": 26042 }, { "epoch": 0.9326552903468405, "grad_norm": 1.8373398780822754, "learning_rate": 2.3681334123935805e-06, "loss": 1.5232, "step": 26043 }, { "epoch": 0.9326911024764087, "grad_norm": 1.4119781255722046, "learning_rate": 2.3656247690674092e-06, "loss": 1.5827, "step": 26044 }, { "epoch": 0.932726914605977, "grad_norm": 1.4931371212005615, "learning_rate": 2.3631174392858335e-06, "loss": 1.5398, "step": 26045 }, { "epoch": 0.9327627267355453, "grad_norm": 2.504749298095703, "learning_rate": 2.3606114230825704e-06, "loss": 1.3982, "step": 26046 }, { "epoch": 0.9327985388651137, "grad_norm": 1.844650149345398, "learning_rate": 2.3581067204913267e-06, "loss": 1.3822, "step": 26047 }, { "epoch": 0.9328343509946819, "grad_norm": 1.961634635925293, "learning_rate": 2.355603331545808e-06, "loss": 1.5726, "step": 26048 }, { "epoch": 0.9328701631242502, "grad_norm": 1.4602668285369873, "learning_rate": 2.3531012562796995e-06, "loss": 1.3054, "step": 26049 }, { "epoch": 0.9329059752538185, "grad_norm": 1.6050527095794678, "learning_rate": 2.3506004947266512e-06, "loss": 1.5448, "step": 26050 }, { "epoch": 0.9329417873833867, "grad_norm": 1.3830983638763428, "learning_rate": 2.3481010469203256e-06, "loss": 1.141, "step": 26051 }, { "epoch": 0.932977599512955, "grad_norm": 1.515015721321106, "learning_rate": 2.345602912894329e-06, "loss": 1.4587, "step": 26052 }, { "epoch": 0.9330134116425233, "grad_norm": 2.136948347091675, "learning_rate": 2.3431060926822903e-06, "loss": 1.4007, "step": 26053 }, { "epoch": 0.9330492237720917, "grad_norm": 1.7469984292984009, "learning_rate": 2.340610586317782e-06, "loss": 1.6419, "step": 26054 }, { "epoch": 0.9330850359016599, "grad_norm": 1.5936490297317505, "learning_rate": 2.3381163938343776e-06, "loss": 1.5191, "step": 26055 }, { "epoch": 0.9331208480312282, "grad_norm": 1.4409581422805786, "learning_rate": 2.3356235152656613e-06, "loss": 1.4645, "step": 26056 }, { "epoch": 0.9331566601607965, "grad_norm": 1.6994661092758179, "learning_rate": 2.33313195064514e-06, "loss": 1.4291, "step": 26057 }, { "epoch": 0.9331924722903647, "grad_norm": 2.277144193649292, "learning_rate": 2.330641700006353e-06, "loss": 1.7529, "step": 26058 }, { "epoch": 0.933228284419933, "grad_norm": 1.867384910583496, "learning_rate": 2.328152763382796e-06, "loss": 1.3866, "step": 26059 }, { "epoch": 0.9332640965495013, "grad_norm": 1.7690753936767578, "learning_rate": 2.325665140807964e-06, "loss": 1.6213, "step": 26060 }, { "epoch": 0.9332999086790696, "grad_norm": 1.4293922185897827, "learning_rate": 2.323178832315298e-06, "loss": 1.381, "step": 26061 }, { "epoch": 0.9333357208086379, "grad_norm": 1.8195722103118896, "learning_rate": 2.3206938379382813e-06, "loss": 1.5304, "step": 26062 }, { "epoch": 0.9333715329382062, "grad_norm": 1.5737478733062744, "learning_rate": 2.318210157710332e-06, "loss": 1.3585, "step": 26063 }, { "epoch": 0.9334073450677745, "grad_norm": 1.6605415344238281, "learning_rate": 2.3157277916648567e-06, "loss": 1.1337, "step": 26064 }, { "epoch": 0.9334431571973427, "grad_norm": 1.6333032846450806, "learning_rate": 2.313246739835262e-06, "loss": 1.4887, "step": 26065 }, { "epoch": 0.933478969326911, "grad_norm": 1.589296579360962, "learning_rate": 2.3107670022549323e-06, "loss": 1.6372, "step": 26066 }, { "epoch": 0.9335147814564793, "grad_norm": 1.6948356628417969, "learning_rate": 2.3082885789572182e-06, "loss": 1.316, "step": 26067 }, { "epoch": 0.9335505935860476, "grad_norm": 1.5151407718658447, "learning_rate": 2.305811469975472e-06, "loss": 1.4232, "step": 26068 }, { "epoch": 0.9335864057156159, "grad_norm": 1.8046801090240479, "learning_rate": 2.30333567534301e-06, "loss": 1.536, "step": 26069 }, { "epoch": 0.9336222178451842, "grad_norm": 1.2985812425613403, "learning_rate": 2.3008611950931404e-06, "loss": 1.5431, "step": 26070 }, { "epoch": 0.9336580299747524, "grad_norm": 1.8768430948257446, "learning_rate": 2.2983880292591798e-06, "loss": 1.6994, "step": 26071 }, { "epoch": 0.9336938421043207, "grad_norm": 2.3779830932617188, "learning_rate": 2.295916177874369e-06, "loss": 1.2774, "step": 26072 }, { "epoch": 0.933729654233889, "grad_norm": 2.449038028717041, "learning_rate": 2.2934456409719698e-06, "loss": 1.2834, "step": 26073 }, { "epoch": 0.9337654663634573, "grad_norm": 1.443924069404602, "learning_rate": 2.2909764185852447e-06, "loss": 1.166, "step": 26074 }, { "epoch": 0.9338012784930256, "grad_norm": 1.5654816627502441, "learning_rate": 2.288508510747389e-06, "loss": 1.4546, "step": 26075 }, { "epoch": 0.9338370906225939, "grad_norm": 1.7187449932098389, "learning_rate": 2.2860419174916104e-06, "loss": 1.4493, "step": 26076 }, { "epoch": 0.9338729027521622, "grad_norm": 1.4201176166534424, "learning_rate": 2.2835766388510926e-06, "loss": 1.2746, "step": 26077 }, { "epoch": 0.9339087148817304, "grad_norm": 1.4719351530075073, "learning_rate": 2.2811126748590207e-06, "loss": 1.6015, "step": 26078 }, { "epoch": 0.9339445270112987, "grad_norm": 1.7975047826766968, "learning_rate": 2.278650025548512e-06, "loss": 1.4907, "step": 26079 }, { "epoch": 0.933980339140867, "grad_norm": 2.0674078464508057, "learning_rate": 2.2761886909527187e-06, "loss": 1.4675, "step": 26080 }, { "epoch": 0.9340161512704352, "grad_norm": 1.5718308687210083, "learning_rate": 2.273728671104769e-06, "loss": 1.4185, "step": 26081 }, { "epoch": 0.9340519634000036, "grad_norm": 1.3587315082550049, "learning_rate": 2.271269966037726e-06, "loss": 1.3692, "step": 26082 }, { "epoch": 0.9340877755295719, "grad_norm": 2.1399447917938232, "learning_rate": 2.2688125757846957e-06, "loss": 1.3626, "step": 26083 }, { "epoch": 0.9341235876591402, "grad_norm": 2.119439125061035, "learning_rate": 2.2663565003787078e-06, "loss": 1.5303, "step": 26084 }, { "epoch": 0.9341593997887084, "grad_norm": 1.2838094234466553, "learning_rate": 2.263901739852847e-06, "loss": 1.4867, "step": 26085 }, { "epoch": 0.9341952119182767, "grad_norm": 1.8111953735351562, "learning_rate": 2.2614482942400984e-06, "loss": 1.2078, "step": 26086 }, { "epoch": 0.934231024047845, "grad_norm": 1.9275093078613281, "learning_rate": 2.2589961635735015e-06, "loss": 1.512, "step": 26087 }, { "epoch": 0.9342668361774132, "grad_norm": 1.3588461875915527, "learning_rate": 2.2565453478860297e-06, "loss": 1.1154, "step": 26088 }, { "epoch": 0.9343026483069816, "grad_norm": 1.3044812679290771, "learning_rate": 2.254095847210669e-06, "loss": 1.4511, "step": 26089 }, { "epoch": 0.9343384604365499, "grad_norm": 1.3384042978286743, "learning_rate": 2.2516476615803694e-06, "loss": 1.2449, "step": 26090 }, { "epoch": 0.9343742725661182, "grad_norm": 1.543078899383545, "learning_rate": 2.249200791028039e-06, "loss": 1.4121, "step": 26091 }, { "epoch": 0.9344100846956864, "grad_norm": 1.5723681449890137, "learning_rate": 2.2467552355866505e-06, "loss": 1.5712, "step": 26092 }, { "epoch": 0.9344458968252547, "grad_norm": 1.4810062646865845, "learning_rate": 2.2443109952890674e-06, "loss": 1.3736, "step": 26093 }, { "epoch": 0.934481708954823, "grad_norm": 1.5201414823532104, "learning_rate": 2.241868070168185e-06, "loss": 1.3559, "step": 26094 }, { "epoch": 0.9345175210843912, "grad_norm": 1.811244010925293, "learning_rate": 2.239426460256855e-06, "loss": 1.2936, "step": 26095 }, { "epoch": 0.9345533332139596, "grad_norm": 1.8156101703643799, "learning_rate": 2.236986165587951e-06, "loss": 1.601, "step": 26096 }, { "epoch": 0.9345891453435279, "grad_norm": 1.2325727939605713, "learning_rate": 2.2345471861942914e-06, "loss": 1.2936, "step": 26097 }, { "epoch": 0.9346249574730962, "grad_norm": 1.9929440021514893, "learning_rate": 2.232109522108694e-06, "loss": 1.3793, "step": 26098 }, { "epoch": 0.9346607696026644, "grad_norm": 1.516374111175537, "learning_rate": 2.2296731733639552e-06, "loss": 1.2078, "step": 26099 }, { "epoch": 0.9346965817322327, "grad_norm": 3.2850093841552734, "learning_rate": 2.227238139992849e-06, "loss": 1.2199, "step": 26100 }, { "epoch": 0.934732393861801, "grad_norm": 1.9196382761001587, "learning_rate": 2.224804422028137e-06, "loss": 1.4492, "step": 26101 }, { "epoch": 0.9347682059913692, "grad_norm": 1.7088524103164673, "learning_rate": 2.2223720195025386e-06, "loss": 1.8744, "step": 26102 }, { "epoch": 0.9348040181209376, "grad_norm": 1.5560927391052246, "learning_rate": 2.2199409324488275e-06, "loss": 1.2936, "step": 26103 }, { "epoch": 0.9348398302505059, "grad_norm": 1.8463906049728394, "learning_rate": 2.2175111608996657e-06, "loss": 1.4824, "step": 26104 }, { "epoch": 0.9348756423800741, "grad_norm": 1.8507603406906128, "learning_rate": 2.215082704887772e-06, "loss": 1.5644, "step": 26105 }, { "epoch": 0.9349114545096424, "grad_norm": 1.5813714265823364, "learning_rate": 2.212655564445798e-06, "loss": 1.2692, "step": 26106 }, { "epoch": 0.9349472666392107, "grad_norm": 1.5400943756103516, "learning_rate": 2.2102297396064176e-06, "loss": 1.3857, "step": 26107 }, { "epoch": 0.934983078768779, "grad_norm": 1.7825454473495483, "learning_rate": 2.20780523040226e-06, "loss": 1.1498, "step": 26108 }, { "epoch": 0.9350188908983472, "grad_norm": 1.7498632669448853, "learning_rate": 2.2053820368659215e-06, "loss": 1.4762, "step": 26109 }, { "epoch": 0.9350547030279156, "grad_norm": 1.3450285196304321, "learning_rate": 2.202960159030032e-06, "loss": 1.3703, "step": 26110 }, { "epoch": 0.9350905151574839, "grad_norm": 1.6363308429718018, "learning_rate": 2.200539596927165e-06, "loss": 1.3916, "step": 26111 }, { "epoch": 0.9351263272870521, "grad_norm": 1.3470873832702637, "learning_rate": 2.1981203505898827e-06, "loss": 1.4387, "step": 26112 }, { "epoch": 0.9351621394166204, "grad_norm": 1.7252334356307983, "learning_rate": 2.195702420050727e-06, "loss": 1.7303, "step": 26113 }, { "epoch": 0.9351979515461887, "grad_norm": 1.8090825080871582, "learning_rate": 2.19328580534226e-06, "loss": 1.1771, "step": 26114 }, { "epoch": 0.935233763675757, "grad_norm": 1.476894736289978, "learning_rate": 2.190870506496956e-06, "loss": 1.4577, "step": 26115 }, { "epoch": 0.9352695758053252, "grad_norm": 1.3571605682373047, "learning_rate": 2.188456523547322e-06, "loss": 1.389, "step": 26116 }, { "epoch": 0.9353053879348936, "grad_norm": 1.636551856994629, "learning_rate": 2.1860438565258433e-06, "loss": 1.1224, "step": 26117 }, { "epoch": 0.9353412000644619, "grad_norm": 1.9286612272262573, "learning_rate": 2.183632505464972e-06, "loss": 1.4867, "step": 26118 }, { "epoch": 0.9353770121940301, "grad_norm": 1.419366717338562, "learning_rate": 2.1812224703971597e-06, "loss": 1.399, "step": 26119 }, { "epoch": 0.9354128243235984, "grad_norm": 1.7759507894515991, "learning_rate": 2.1788137513548134e-06, "loss": 1.1488, "step": 26120 }, { "epoch": 0.9354486364531667, "grad_norm": 1.542161464691162, "learning_rate": 2.176406348370341e-06, "loss": 1.4903, "step": 26121 }, { "epoch": 0.9354844485827349, "grad_norm": 1.8545513153076172, "learning_rate": 2.174000261476139e-06, "loss": 1.3911, "step": 26122 }, { "epoch": 0.9355202607123032, "grad_norm": 1.7891281843185425, "learning_rate": 2.171595490704592e-06, "loss": 1.4406, "step": 26123 }, { "epoch": 0.9355560728418716, "grad_norm": 1.7108030319213867, "learning_rate": 2.1691920360880303e-06, "loss": 1.1197, "step": 26124 }, { "epoch": 0.9355918849714399, "grad_norm": 1.6328608989715576, "learning_rate": 2.166789897658794e-06, "loss": 1.5987, "step": 26125 }, { "epoch": 0.9356276971010081, "grad_norm": 1.4279072284698486, "learning_rate": 2.1643890754492136e-06, "loss": 1.3825, "step": 26126 }, { "epoch": 0.9356635092305764, "grad_norm": 1.4188889265060425, "learning_rate": 2.1619895694915624e-06, "loss": 1.826, "step": 26127 }, { "epoch": 0.9356993213601447, "grad_norm": 1.5467236042022705, "learning_rate": 2.159591379818149e-06, "loss": 1.4663, "step": 26128 }, { "epoch": 0.9357351334897129, "grad_norm": 1.9561902284622192, "learning_rate": 2.1571945064612243e-06, "loss": 1.3251, "step": 26129 }, { "epoch": 0.9357709456192812, "grad_norm": 1.4215346574783325, "learning_rate": 2.1547989494530517e-06, "loss": 1.3927, "step": 26130 }, { "epoch": 0.9358067577488496, "grad_norm": 1.427872896194458, "learning_rate": 2.1524047088258394e-06, "loss": 1.5345, "step": 26131 }, { "epoch": 0.9358425698784179, "grad_norm": 1.6102819442749023, "learning_rate": 2.1500117846118053e-06, "loss": 1.3043, "step": 26132 }, { "epoch": 0.9358783820079861, "grad_norm": 1.5477509498596191, "learning_rate": 2.147620176843157e-06, "loss": 1.0714, "step": 26133 }, { "epoch": 0.9359141941375544, "grad_norm": 1.82809317111969, "learning_rate": 2.145229885552047e-06, "loss": 1.4888, "step": 26134 }, { "epoch": 0.9359500062671227, "grad_norm": 2.383084535598755, "learning_rate": 2.142840910770638e-06, "loss": 1.1927, "step": 26135 }, { "epoch": 0.9359858183966909, "grad_norm": 2.7783987522125244, "learning_rate": 2.140453252531083e-06, "loss": 1.5051, "step": 26136 }, { "epoch": 0.9360216305262592, "grad_norm": 1.496717929840088, "learning_rate": 2.1380669108655105e-06, "loss": 1.25, "step": 26137 }, { "epoch": 0.9360574426558276, "grad_norm": 1.7770413160324097, "learning_rate": 2.135681885806007e-06, "loss": 1.5141, "step": 26138 }, { "epoch": 0.9360932547853958, "grad_norm": 1.5932000875473022, "learning_rate": 2.133298177384668e-06, "loss": 1.4161, "step": 26139 }, { "epoch": 0.9361290669149641, "grad_norm": 1.4183354377746582, "learning_rate": 2.1309157856335694e-06, "loss": 1.6704, "step": 26140 }, { "epoch": 0.9361648790445324, "grad_norm": 2.0836100578308105, "learning_rate": 2.128534710584751e-06, "loss": 1.1774, "step": 26141 }, { "epoch": 0.9362006911741007, "grad_norm": 1.5669517517089844, "learning_rate": 2.126154952270254e-06, "loss": 1.4349, "step": 26142 }, { "epoch": 0.9362365033036689, "grad_norm": 1.7547202110290527, "learning_rate": 2.1237765107220973e-06, "loss": 1.235, "step": 26143 }, { "epoch": 0.9362723154332372, "grad_norm": 1.7867295742034912, "learning_rate": 2.121399385972278e-06, "loss": 1.0921, "step": 26144 }, { "epoch": 0.9363081275628056, "grad_norm": 1.2874763011932373, "learning_rate": 2.11902357805277e-06, "loss": 1.058, "step": 26145 }, { "epoch": 0.9363439396923738, "grad_norm": 1.6540254354476929, "learning_rate": 2.116649086995537e-06, "loss": 1.5833, "step": 26146 }, { "epoch": 0.9363797518219421, "grad_norm": 1.5691769123077393, "learning_rate": 2.1142759128325306e-06, "loss": 1.2805, "step": 26147 }, { "epoch": 0.9364155639515104, "grad_norm": 2.247567653656006, "learning_rate": 2.1119040555956925e-06, "loss": 1.2188, "step": 26148 }, { "epoch": 0.9364513760810786, "grad_norm": 1.526218295097351, "learning_rate": 2.109533515316908e-06, "loss": 1.198, "step": 26149 }, { "epoch": 0.9364871882106469, "grad_norm": 1.5749701261520386, "learning_rate": 2.1071642920280855e-06, "loss": 1.3176, "step": 26150 }, { "epoch": 0.9365230003402152, "grad_norm": 1.655874490737915, "learning_rate": 2.1047963857610986e-06, "loss": 1.3957, "step": 26151 }, { "epoch": 0.9365588124697836, "grad_norm": 1.9014919996261597, "learning_rate": 2.102429796547789e-06, "loss": 1.6384, "step": 26152 }, { "epoch": 0.9365946245993518, "grad_norm": 1.5637357234954834, "learning_rate": 2.100064524420009e-06, "loss": 1.303, "step": 26153 }, { "epoch": 0.9366304367289201, "grad_norm": 2.0295560359954834, "learning_rate": 2.0977005694095774e-06, "loss": 1.3744, "step": 26154 }, { "epoch": 0.9366662488584884, "grad_norm": 2.1295666694641113, "learning_rate": 2.0953379315483134e-06, "loss": 1.2995, "step": 26155 }, { "epoch": 0.9367020609880566, "grad_norm": 1.3736835718154907, "learning_rate": 2.0929766108679803e-06, "loss": 1.4379, "step": 26156 }, { "epoch": 0.9367378731176249, "grad_norm": 1.850738763809204, "learning_rate": 2.0906166074003532e-06, "loss": 1.3111, "step": 26157 }, { "epoch": 0.9367736852471932, "grad_norm": 1.3870395421981812, "learning_rate": 2.0882579211771837e-06, "loss": 1.3912, "step": 26158 }, { "epoch": 0.9368094973767616, "grad_norm": 1.872261881828308, "learning_rate": 2.0859005522302245e-06, "loss": 1.5057, "step": 26159 }, { "epoch": 0.9368453095063298, "grad_norm": 1.5025333166122437, "learning_rate": 2.0835445005911503e-06, "loss": 1.2798, "step": 26160 }, { "epoch": 0.9368811216358981, "grad_norm": 1.5420435667037964, "learning_rate": 2.081189766291691e-06, "loss": 1.303, "step": 26161 }, { "epoch": 0.9369169337654664, "grad_norm": 1.7748241424560547, "learning_rate": 2.0788363493635333e-06, "loss": 1.4483, "step": 26162 }, { "epoch": 0.9369527458950346, "grad_norm": 1.7625503540039062, "learning_rate": 2.0764842498383063e-06, "loss": 1.348, "step": 26163 }, { "epoch": 0.9369885580246029, "grad_norm": 1.4400793313980103, "learning_rate": 2.074133467747663e-06, "loss": 1.6231, "step": 26164 }, { "epoch": 0.9370243701541712, "grad_norm": 1.3040224313735962, "learning_rate": 2.071784003123256e-06, "loss": 1.4208, "step": 26165 }, { "epoch": 0.9370601822837396, "grad_norm": 1.9658088684082031, "learning_rate": 2.069435855996671e-06, "loss": 1.5414, "step": 26166 }, { "epoch": 0.9370959944133078, "grad_norm": 1.4681401252746582, "learning_rate": 2.0670890263995047e-06, "loss": 1.1518, "step": 26167 }, { "epoch": 0.9371318065428761, "grad_norm": 1.5593922138214111, "learning_rate": 2.0647435143633322e-06, "loss": 1.3844, "step": 26168 }, { "epoch": 0.9371676186724444, "grad_norm": 1.4829758405685425, "learning_rate": 2.0623993199197055e-06, "loss": 1.4184, "step": 26169 }, { "epoch": 0.9372034308020126, "grad_norm": 1.5997012853622437, "learning_rate": 2.0600564431001668e-06, "loss": 1.2306, "step": 26170 }, { "epoch": 0.9372392429315809, "grad_norm": 1.61320960521698, "learning_rate": 2.057714883936235e-06, "loss": 1.3906, "step": 26171 }, { "epoch": 0.9372750550611492, "grad_norm": 2.0251548290252686, "learning_rate": 2.0553746424594065e-06, "loss": 1.2477, "step": 26172 }, { "epoch": 0.9373108671907175, "grad_norm": 1.9987709522247314, "learning_rate": 2.0530357187011907e-06, "loss": 1.3705, "step": 26173 }, { "epoch": 0.9373466793202858, "grad_norm": 1.7930504083633423, "learning_rate": 2.050698112693028e-06, "loss": 1.1294, "step": 26174 }, { "epoch": 0.9373824914498541, "grad_norm": 2.174814462661743, "learning_rate": 2.0483618244663714e-06, "loss": 1.3507, "step": 26175 }, { "epoch": 0.9374183035794224, "grad_norm": 2.3176262378692627, "learning_rate": 2.0460268540526518e-06, "loss": 1.405, "step": 26176 }, { "epoch": 0.9374541157089906, "grad_norm": 2.033170223236084, "learning_rate": 2.04369320148331e-06, "loss": 1.5684, "step": 26177 }, { "epoch": 0.9374899278385589, "grad_norm": 1.7996870279312134, "learning_rate": 2.041360866789721e-06, "loss": 1.1019, "step": 26178 }, { "epoch": 0.9375257399681272, "grad_norm": 2.1224141120910645, "learning_rate": 2.0390298500032377e-06, "loss": 1.1435, "step": 26179 }, { "epoch": 0.9375615520976955, "grad_norm": 1.8272570371627808, "learning_rate": 2.0367001511552685e-06, "loss": 1.5015, "step": 26180 }, { "epoch": 0.9375973642272638, "grad_norm": 1.5879367589950562, "learning_rate": 2.0343717702771325e-06, "loss": 1.3781, "step": 26181 }, { "epoch": 0.9376331763568321, "grad_norm": 1.2769107818603516, "learning_rate": 2.0320447074001492e-06, "loss": 1.6663, "step": 26182 }, { "epoch": 0.9376689884864003, "grad_norm": 1.972424864768982, "learning_rate": 2.0297189625556377e-06, "loss": 1.2255, "step": 26183 }, { "epoch": 0.9377048006159686, "grad_norm": 1.7590620517730713, "learning_rate": 2.027394535774896e-06, "loss": 1.394, "step": 26184 }, { "epoch": 0.9377406127455369, "grad_norm": 1.6972074508666992, "learning_rate": 2.0250714270891757e-06, "loss": 1.5186, "step": 26185 }, { "epoch": 0.9377764248751052, "grad_norm": 1.7677464485168457, "learning_rate": 2.0227496365297304e-06, "loss": 1.415, "step": 26186 }, { "epoch": 0.9378122370046735, "grad_norm": 1.1106623411178589, "learning_rate": 2.020429164127835e-06, "loss": 0.9381, "step": 26187 }, { "epoch": 0.9378480491342418, "grad_norm": 1.5314291715621948, "learning_rate": 2.0181100099146533e-06, "loss": 1.1579, "step": 26188 }, { "epoch": 0.9378838612638101, "grad_norm": 1.614284873008728, "learning_rate": 2.015792173921438e-06, "loss": 1.1576, "step": 26189 }, { "epoch": 0.9379196733933783, "grad_norm": 1.5244933366775513, "learning_rate": 2.01347565617932e-06, "loss": 1.3892, "step": 26190 }, { "epoch": 0.9379554855229466, "grad_norm": 1.5229711532592773, "learning_rate": 2.0111604567195185e-06, "loss": 1.249, "step": 26191 }, { "epoch": 0.9379912976525149, "grad_norm": 1.5737578868865967, "learning_rate": 2.008846575573142e-06, "loss": 1.0932, "step": 26192 }, { "epoch": 0.9380271097820831, "grad_norm": 1.5744316577911377, "learning_rate": 2.006534012771344e-06, "loss": 1.4432, "step": 26193 }, { "epoch": 0.9380629219116515, "grad_norm": 1.4843591451644897, "learning_rate": 2.004222768345221e-06, "loss": 1.482, "step": 26194 }, { "epoch": 0.9380987340412198, "grad_norm": 1.2316222190856934, "learning_rate": 2.0019128423258816e-06, "loss": 1.3807, "step": 26195 }, { "epoch": 0.9381345461707881, "grad_norm": 1.4942682981491089, "learning_rate": 1.999604234744401e-06, "loss": 1.4272, "step": 26196 }, { "epoch": 0.9381703583003563, "grad_norm": 2.2705743312835693, "learning_rate": 1.99729694563181e-06, "loss": 1.5428, "step": 26197 }, { "epoch": 0.9382061704299246, "grad_norm": 1.8797887563705444, "learning_rate": 1.9949909750192064e-06, "loss": 1.4058, "step": 26198 }, { "epoch": 0.9382419825594929, "grad_norm": 1.439017653465271, "learning_rate": 1.992686322937565e-06, "loss": 1.211, "step": 26199 }, { "epoch": 0.9382777946890611, "grad_norm": 1.3723461627960205, "learning_rate": 1.990382989417916e-06, "loss": 1.4947, "step": 26200 }, { "epoch": 0.9383136068186295, "grad_norm": 1.789353847503662, "learning_rate": 1.9880809744912244e-06, "loss": 1.4519, "step": 26201 }, { "epoch": 0.9383494189481978, "grad_norm": 1.4479377269744873, "learning_rate": 1.985780278188487e-06, "loss": 1.5526, "step": 26202 }, { "epoch": 0.9383852310777661, "grad_norm": 1.693623661994934, "learning_rate": 1.983480900540646e-06, "loss": 1.5865, "step": 26203 }, { "epoch": 0.9384210432073343, "grad_norm": 1.6015986204147339, "learning_rate": 1.981182841578644e-06, "loss": 1.5839, "step": 26204 }, { "epoch": 0.9384568553369026, "grad_norm": 2.0890917778015137, "learning_rate": 1.978886101333388e-06, "loss": 1.1135, "step": 26205 }, { "epoch": 0.9384926674664709, "grad_norm": 1.5381704568862915, "learning_rate": 1.9765906798357767e-06, "loss": 1.5057, "step": 26206 }, { "epoch": 0.9385284795960391, "grad_norm": 1.5179535150527954, "learning_rate": 1.9742965771167077e-06, "loss": 1.5197, "step": 26207 }, { "epoch": 0.9385642917256075, "grad_norm": 1.7586171627044678, "learning_rate": 1.972003793207011e-06, "loss": 1.5069, "step": 26208 }, { "epoch": 0.9386001038551758, "grad_norm": 1.4683053493499756, "learning_rate": 1.969712328137574e-06, "loss": 1.2305, "step": 26209 }, { "epoch": 0.938635915984744, "grad_norm": 1.755323886871338, "learning_rate": 1.967422181939205e-06, "loss": 1.4299, "step": 26210 }, { "epoch": 0.9386717281143123, "grad_norm": 1.5153175592422485, "learning_rate": 1.9651333546427232e-06, "loss": 1.5045, "step": 26211 }, { "epoch": 0.9387075402438806, "grad_norm": 1.4256856441497803, "learning_rate": 1.9628458462789044e-06, "loss": 1.1516, "step": 26212 }, { "epoch": 0.9387433523734489, "grad_norm": 1.3613686561584473, "learning_rate": 1.960559656878547e-06, "loss": 1.4651, "step": 26213 }, { "epoch": 0.9387791645030171, "grad_norm": 1.3513787984848022, "learning_rate": 1.9582747864723917e-06, "loss": 1.3124, "step": 26214 }, { "epoch": 0.9388149766325855, "grad_norm": 1.5042704343795776, "learning_rate": 1.9559912350911925e-06, "loss": 1.398, "step": 26215 }, { "epoch": 0.9388507887621538, "grad_norm": 1.5108270645141602, "learning_rate": 1.953709002765647e-06, "loss": 1.3385, "step": 26216 }, { "epoch": 0.938886600891722, "grad_norm": 1.5499955415725708, "learning_rate": 1.951428089526486e-06, "loss": 1.5028, "step": 26217 }, { "epoch": 0.9389224130212903, "grad_norm": 1.7364864349365234, "learning_rate": 1.949148495404396e-06, "loss": 1.306, "step": 26218 }, { "epoch": 0.9389582251508586, "grad_norm": 1.5313931703567505, "learning_rate": 1.9468702204300195e-06, "loss": 1.5107, "step": 26219 }, { "epoch": 0.9389940372804269, "grad_norm": 1.9169384241104126, "learning_rate": 1.9445932646340314e-06, "loss": 1.3873, "step": 26220 }, { "epoch": 0.9390298494099951, "grad_norm": 1.4949196577072144, "learning_rate": 1.9423176280470633e-06, "loss": 1.5275, "step": 26221 }, { "epoch": 0.9390656615395635, "grad_norm": 1.7438608407974243, "learning_rate": 1.940043310699724e-06, "loss": 1.3416, "step": 26222 }, { "epoch": 0.9391014736691318, "grad_norm": 2.022535562515259, "learning_rate": 1.937770312622611e-06, "loss": 1.5157, "step": 26223 }, { "epoch": 0.9391372857987, "grad_norm": 1.9582380056381226, "learning_rate": 1.9354986338463e-06, "loss": 1.2197, "step": 26224 }, { "epoch": 0.9391730979282683, "grad_norm": 1.4199358224868774, "learning_rate": 1.9332282744013774e-06, "loss": 1.5727, "step": 26225 }, { "epoch": 0.9392089100578366, "grad_norm": 1.7500290870666504, "learning_rate": 1.9309592343183636e-06, "loss": 1.5048, "step": 26226 }, { "epoch": 0.9392447221874048, "grad_norm": 1.1745381355285645, "learning_rate": 1.9286915136277894e-06, "loss": 1.3728, "step": 26227 }, { "epoch": 0.9392805343169731, "grad_norm": 1.6411765813827515, "learning_rate": 1.926425112360164e-06, "loss": 1.1032, "step": 26228 }, { "epoch": 0.9393163464465415, "grad_norm": 2.0992393493652344, "learning_rate": 1.924160030545996e-06, "loss": 1.5346, "step": 26229 }, { "epoch": 0.9393521585761098, "grad_norm": 1.2511874437332153, "learning_rate": 1.9218962682157395e-06, "loss": 1.4908, "step": 26230 }, { "epoch": 0.939387970705678, "grad_norm": 1.5121225118637085, "learning_rate": 1.919633825399858e-06, "loss": 1.4154, "step": 26231 }, { "epoch": 0.9394237828352463, "grad_norm": 1.8958724737167358, "learning_rate": 1.9173727021287947e-06, "loss": 1.4142, "step": 26232 }, { "epoch": 0.9394595949648146, "grad_norm": 1.8122069835662842, "learning_rate": 1.915112898432947e-06, "loss": 1.5462, "step": 26233 }, { "epoch": 0.9394954070943828, "grad_norm": 1.5994408130645752, "learning_rate": 1.9128544143427463e-06, "loss": 1.5741, "step": 26234 }, { "epoch": 0.9395312192239511, "grad_norm": 1.965086817741394, "learning_rate": 1.910597249888568e-06, "loss": 1.4839, "step": 26235 }, { "epoch": 0.9395670313535195, "grad_norm": 1.5724587440490723, "learning_rate": 1.9083414051007776e-06, "loss": 1.6046, "step": 26236 }, { "epoch": 0.9396028434830878, "grad_norm": 1.785568118095398, "learning_rate": 1.9060868800097164e-06, "loss": 1.3867, "step": 26237 }, { "epoch": 0.939638655612656, "grad_norm": 1.5689623355865479, "learning_rate": 1.9038336746457276e-06, "loss": 1.2745, "step": 26238 }, { "epoch": 0.9396744677422243, "grad_norm": 2.110036611557007, "learning_rate": 1.9015817890391308e-06, "loss": 1.2477, "step": 26239 }, { "epoch": 0.9397102798717926, "grad_norm": 2.069141149520874, "learning_rate": 1.8993312232202021e-06, "loss": 1.5739, "step": 26240 }, { "epoch": 0.9397460920013608, "grad_norm": 1.2748523950576782, "learning_rate": 1.8970819772192394e-06, "loss": 1.4898, "step": 26241 }, { "epoch": 0.9397819041309291, "grad_norm": 1.6000792980194092, "learning_rate": 1.8948340510664853e-06, "loss": 1.1472, "step": 26242 }, { "epoch": 0.9398177162604975, "grad_norm": 1.5243643522262573, "learning_rate": 1.8925874447922044e-06, "loss": 1.3172, "step": 26243 }, { "epoch": 0.9398535283900658, "grad_norm": 1.70756995677948, "learning_rate": 1.8903421584266056e-06, "loss": 1.4195, "step": 26244 }, { "epoch": 0.939889340519634, "grad_norm": 1.3219189643859863, "learning_rate": 1.8880981919998875e-06, "loss": 1.3242, "step": 26245 }, { "epoch": 0.9399251526492023, "grad_norm": 1.4109432697296143, "learning_rate": 1.8858555455422699e-06, "loss": 1.4621, "step": 26246 }, { "epoch": 0.9399609647787706, "grad_norm": 1.9342769384384155, "learning_rate": 1.8836142190839067e-06, "loss": 1.5312, "step": 26247 }, { "epoch": 0.9399967769083388, "grad_norm": 1.6864076852798462, "learning_rate": 1.8813742126549404e-06, "loss": 1.4521, "step": 26248 }, { "epoch": 0.9400325890379071, "grad_norm": 1.7248731851577759, "learning_rate": 1.879135526285525e-06, "loss": 1.3108, "step": 26249 }, { "epoch": 0.9400684011674755, "grad_norm": 1.3078269958496094, "learning_rate": 1.876898160005791e-06, "loss": 1.2327, "step": 26250 }, { "epoch": 0.9401042132970437, "grad_norm": 1.7912778854370117, "learning_rate": 1.8746621138458042e-06, "loss": 1.3538, "step": 26251 }, { "epoch": 0.940140025426612, "grad_norm": 1.9527921676635742, "learning_rate": 1.8724273878356624e-06, "loss": 1.23, "step": 26252 }, { "epoch": 0.9401758375561803, "grad_norm": 1.850545883178711, "learning_rate": 1.8701939820054414e-06, "loss": 1.5436, "step": 26253 }, { "epoch": 0.9402116496857486, "grad_norm": 1.3803719282150269, "learning_rate": 1.8679618963851952e-06, "loss": 1.543, "step": 26254 }, { "epoch": 0.9402474618153168, "grad_norm": 1.3144088983535767, "learning_rate": 1.8657311310049218e-06, "loss": 1.5007, "step": 26255 }, { "epoch": 0.9402832739448851, "grad_norm": 1.282601237297058, "learning_rate": 1.863501685894664e-06, "loss": 1.3551, "step": 26256 }, { "epoch": 0.9403190860744535, "grad_norm": 2.010690927505493, "learning_rate": 1.861273561084398e-06, "loss": 1.3937, "step": 26257 }, { "epoch": 0.9403548982040217, "grad_norm": 2.084581136703491, "learning_rate": 1.8590467566041104e-06, "loss": 1.7239, "step": 26258 }, { "epoch": 0.94039071033359, "grad_norm": 1.5342340469360352, "learning_rate": 1.8568212724837442e-06, "loss": 1.2138, "step": 26259 }, { "epoch": 0.9404265224631583, "grad_norm": 1.4099143743515015, "learning_rate": 1.8545971087532644e-06, "loss": 1.7193, "step": 26260 }, { "epoch": 0.9404623345927265, "grad_norm": 1.9763034582138062, "learning_rate": 1.8523742654425802e-06, "loss": 1.4201, "step": 26261 }, { "epoch": 0.9404981467222948, "grad_norm": 1.699539065361023, "learning_rate": 1.8501527425816012e-06, "loss": 1.6167, "step": 26262 }, { "epoch": 0.9405339588518631, "grad_norm": 1.9624556303024292, "learning_rate": 1.8479325402002034e-06, "loss": 1.5734, "step": 26263 }, { "epoch": 0.9405697709814315, "grad_norm": 2.5679054260253906, "learning_rate": 1.8457136583282741e-06, "loss": 1.3815, "step": 26264 }, { "epoch": 0.9406055831109997, "grad_norm": 1.602765440940857, "learning_rate": 1.8434960969956561e-06, "loss": 1.489, "step": 26265 }, { "epoch": 0.940641395240568, "grad_norm": 2.7537906169891357, "learning_rate": 1.8412798562321809e-06, "loss": 1.2926, "step": 26266 }, { "epoch": 0.9406772073701363, "grad_norm": 1.3616611957550049, "learning_rate": 1.8390649360676692e-06, "loss": 1.625, "step": 26267 }, { "epoch": 0.9407130194997045, "grad_norm": 1.3584762811660767, "learning_rate": 1.8368513365319306e-06, "loss": 1.11, "step": 26268 }, { "epoch": 0.9407488316292728, "grad_norm": 1.5779452323913574, "learning_rate": 1.83463905765473e-06, "loss": 1.6435, "step": 26269 }, { "epoch": 0.9407846437588411, "grad_norm": 1.533232569694519, "learning_rate": 1.8324280994658327e-06, "loss": 1.4304, "step": 26270 }, { "epoch": 0.9408204558884095, "grad_norm": 1.4318612813949585, "learning_rate": 1.8302184619949925e-06, "loss": 1.3363, "step": 26271 }, { "epoch": 0.9408562680179777, "grad_norm": 1.623886227607727, "learning_rate": 1.8280101452719412e-06, "loss": 1.2529, "step": 26272 }, { "epoch": 0.940892080147546, "grad_norm": 1.5533499717712402, "learning_rate": 1.825803149326366e-06, "loss": 1.488, "step": 26273 }, { "epoch": 0.9409278922771143, "grad_norm": 1.4572827816009521, "learning_rate": 1.8235974741879769e-06, "loss": 1.2201, "step": 26274 }, { "epoch": 0.9409637044066825, "grad_norm": 1.7187683582305908, "learning_rate": 1.8213931198864608e-06, "loss": 1.6117, "step": 26275 }, { "epoch": 0.9409995165362508, "grad_norm": 1.6530053615570068, "learning_rate": 1.8191900864514388e-06, "loss": 1.2012, "step": 26276 }, { "epoch": 0.9410353286658191, "grad_norm": 1.9581012725830078, "learning_rate": 1.816988373912587e-06, "loss": 1.4425, "step": 26277 }, { "epoch": 0.9410711407953875, "grad_norm": 1.6734392642974854, "learning_rate": 1.8147879822994928e-06, "loss": 1.3796, "step": 26278 }, { "epoch": 0.9411069529249557, "grad_norm": 1.4807915687561035, "learning_rate": 1.8125889116417883e-06, "loss": 1.314, "step": 26279 }, { "epoch": 0.941142765054524, "grad_norm": 2.1756036281585693, "learning_rate": 1.8103911619690384e-06, "loss": 1.6318, "step": 26280 }, { "epoch": 0.9411785771840923, "grad_norm": 1.8008071184158325, "learning_rate": 1.8081947333108195e-06, "loss": 1.4568, "step": 26281 }, { "epoch": 0.9412143893136605, "grad_norm": 1.7400346994400024, "learning_rate": 1.805999625696686e-06, "loss": 1.4965, "step": 26282 }, { "epoch": 0.9412502014432288, "grad_norm": 1.8043440580368042, "learning_rate": 1.8038058391561697e-06, "loss": 1.1718, "step": 26283 }, { "epoch": 0.9412860135727971, "grad_norm": 2.3016085624694824, "learning_rate": 1.8016133737187913e-06, "loss": 1.3647, "step": 26284 }, { "epoch": 0.9413218257023654, "grad_norm": 1.9413868188858032, "learning_rate": 1.799422229414016e-06, "loss": 1.1064, "step": 26285 }, { "epoch": 0.9413576378319337, "grad_norm": 1.3793493509292603, "learning_rate": 1.7972324062713652e-06, "loss": 1.5471, "step": 26286 }, { "epoch": 0.941393449961502, "grad_norm": 1.6569875478744507, "learning_rate": 1.7950439043202593e-06, "loss": 1.3871, "step": 26287 }, { "epoch": 0.9414292620910703, "grad_norm": 2.83836030960083, "learning_rate": 1.7928567235901861e-06, "loss": 1.6417, "step": 26288 }, { "epoch": 0.9414650742206385, "grad_norm": 1.4256877899169922, "learning_rate": 1.790670864110522e-06, "loss": 1.3258, "step": 26289 }, { "epoch": 0.9415008863502068, "grad_norm": 1.5324336290359497, "learning_rate": 1.7884863259107209e-06, "loss": 1.4885, "step": 26290 }, { "epoch": 0.9415366984797751, "grad_norm": 1.494587779045105, "learning_rate": 1.7863031090201377e-06, "loss": 1.5611, "step": 26291 }, { "epoch": 0.9415725106093434, "grad_norm": 1.7873804569244385, "learning_rate": 1.7841212134681705e-06, "loss": 1.4419, "step": 26292 }, { "epoch": 0.9416083227389117, "grad_norm": 1.5246162414550781, "learning_rate": 1.781940639284163e-06, "loss": 1.4035, "step": 26293 }, { "epoch": 0.94164413486848, "grad_norm": 2.191568613052368, "learning_rate": 1.7797613864974472e-06, "loss": 1.4957, "step": 26294 }, { "epoch": 0.9416799469980482, "grad_norm": 1.926516056060791, "learning_rate": 1.7775834551373548e-06, "loss": 0.9744, "step": 26295 }, { "epoch": 0.9417157591276165, "grad_norm": 2.023942470550537, "learning_rate": 1.775406845233163e-06, "loss": 1.1816, "step": 26296 }, { "epoch": 0.9417515712571848, "grad_norm": 1.6293940544128418, "learning_rate": 1.7732315568141811e-06, "loss": 1.2553, "step": 26297 }, { "epoch": 0.941787383386753, "grad_norm": 1.5346230268478394, "learning_rate": 1.7710575899096637e-06, "loss": 1.3784, "step": 26298 }, { "epoch": 0.9418231955163214, "grad_norm": 1.4494526386260986, "learning_rate": 1.7688849445488654e-06, "loss": 1.2135, "step": 26299 }, { "epoch": 0.9418590076458897, "grad_norm": 1.3800164461135864, "learning_rate": 1.7667136207609958e-06, "loss": 1.4168, "step": 26300 }, { "epoch": 0.941894819775458, "grad_norm": 1.7155061960220337, "learning_rate": 1.7645436185753095e-06, "loss": 1.5418, "step": 26301 }, { "epoch": 0.9419306319050262, "grad_norm": 1.9509527683258057, "learning_rate": 1.7623749380209609e-06, "loss": 1.2594, "step": 26302 }, { "epoch": 0.9419664440345945, "grad_norm": 1.7195106744766235, "learning_rate": 1.7602075791271377e-06, "loss": 1.451, "step": 26303 }, { "epoch": 0.9420022561641628, "grad_norm": 1.8029755353927612, "learning_rate": 1.7580415419229946e-06, "loss": 1.1763, "step": 26304 }, { "epoch": 0.942038068293731, "grad_norm": 1.6189782619476318, "learning_rate": 1.7558768264376856e-06, "loss": 1.4517, "step": 26305 }, { "epoch": 0.9420738804232994, "grad_norm": 1.5579745769500732, "learning_rate": 1.7537134327003324e-06, "loss": 1.4916, "step": 26306 }, { "epoch": 0.9421096925528677, "grad_norm": 2.151827573776245, "learning_rate": 1.7515513607400225e-06, "loss": 1.6583, "step": 26307 }, { "epoch": 0.942145504682436, "grad_norm": 1.8219877481460571, "learning_rate": 1.749390610585877e-06, "loss": 1.2741, "step": 26308 }, { "epoch": 0.9421813168120042, "grad_norm": 1.3775928020477295, "learning_rate": 1.7472311822669397e-06, "loss": 1.3416, "step": 26309 }, { "epoch": 0.9422171289415725, "grad_norm": 1.1605829000473022, "learning_rate": 1.7450730758122757e-06, "loss": 1.4166, "step": 26310 }, { "epoch": 0.9422529410711408, "grad_norm": 1.6979670524597168, "learning_rate": 1.7429162912508956e-06, "loss": 1.4178, "step": 26311 }, { "epoch": 0.942288753200709, "grad_norm": 1.6193804740905762, "learning_rate": 1.7407608286118427e-06, "loss": 1.5039, "step": 26312 }, { "epoch": 0.9423245653302774, "grad_norm": 2.2689478397369385, "learning_rate": 1.7386066879241159e-06, "loss": 1.6966, "step": 26313 }, { "epoch": 0.9423603774598457, "grad_norm": 1.6678130626678467, "learning_rate": 1.736453869216681e-06, "loss": 1.4604, "step": 26314 }, { "epoch": 0.942396189589414, "grad_norm": 1.554015040397644, "learning_rate": 1.7343023725185038e-06, "loss": 1.471, "step": 26315 }, { "epoch": 0.9424320017189822, "grad_norm": 1.548634648323059, "learning_rate": 1.7321521978585387e-06, "loss": 1.5321, "step": 26316 }, { "epoch": 0.9424678138485505, "grad_norm": 1.383250117301941, "learning_rate": 1.7300033452657184e-06, "loss": 1.3117, "step": 26317 }, { "epoch": 0.9425036259781188, "grad_norm": 1.8333631753921509, "learning_rate": 1.7278558147689306e-06, "loss": 1.4467, "step": 26318 }, { "epoch": 0.942539438107687, "grad_norm": 2.024153232574463, "learning_rate": 1.7257096063970856e-06, "loss": 1.4771, "step": 26319 }, { "epoch": 0.9425752502372554, "grad_norm": 2.0110878944396973, "learning_rate": 1.7235647201790605e-06, "loss": 1.9133, "step": 26320 }, { "epoch": 0.9426110623668237, "grad_norm": 2.2840211391448975, "learning_rate": 1.7214211561436987e-06, "loss": 1.576, "step": 26321 }, { "epoch": 0.942646874496392, "grad_norm": 1.5875118970870972, "learning_rate": 1.719278914319844e-06, "loss": 1.4262, "step": 26322 }, { "epoch": 0.9426826866259602, "grad_norm": 1.5386816263198853, "learning_rate": 1.7171379947363175e-06, "loss": 1.3028, "step": 26323 }, { "epoch": 0.9427184987555285, "grad_norm": 1.6080418825149536, "learning_rate": 1.7149983974219297e-06, "loss": 1.4332, "step": 26324 }, { "epoch": 0.9427543108850968, "grad_norm": 1.3982714414596558, "learning_rate": 1.7128601224054464e-06, "loss": 1.2564, "step": 26325 }, { "epoch": 0.942790123014665, "grad_norm": 1.6409122943878174, "learning_rate": 1.7107231697156557e-06, "loss": 1.3616, "step": 26326 }, { "epoch": 0.9428259351442334, "grad_norm": 1.7633566856384277, "learning_rate": 1.7085875393813123e-06, "loss": 1.3169, "step": 26327 }, { "epoch": 0.9428617472738017, "grad_norm": 1.4810881614685059, "learning_rate": 1.7064532314311266e-06, "loss": 1.3493, "step": 26328 }, { "epoch": 0.94289755940337, "grad_norm": 1.8232383728027344, "learning_rate": 1.70432024589382e-06, "loss": 1.5052, "step": 26329 }, { "epoch": 0.9429333715329382, "grad_norm": 2.7344441413879395, "learning_rate": 1.702188582798092e-06, "loss": 1.6025, "step": 26330 }, { "epoch": 0.9429691836625065, "grad_norm": 2.21616792678833, "learning_rate": 1.7000582421726308e-06, "loss": 1.5006, "step": 26331 }, { "epoch": 0.9430049957920748, "grad_norm": 1.2851320505142212, "learning_rate": 1.6979292240460799e-06, "loss": 1.2972, "step": 26332 }, { "epoch": 0.943040807921643, "grad_norm": 1.8838286399841309, "learning_rate": 1.695801528447094e-06, "loss": 1.398, "step": 26333 }, { "epoch": 0.9430766200512114, "grad_norm": 1.508453130722046, "learning_rate": 1.6936751554042951e-06, "loss": 1.4436, "step": 26334 }, { "epoch": 0.9431124321807797, "grad_norm": 1.5754175186157227, "learning_rate": 1.6915501049462934e-06, "loss": 1.2449, "step": 26335 }, { "epoch": 0.9431482443103479, "grad_norm": 1.53984797000885, "learning_rate": 1.6894263771016661e-06, "loss": 1.8434, "step": 26336 }, { "epoch": 0.9431840564399162, "grad_norm": 1.8990575075149536, "learning_rate": 1.6873039718990014e-06, "loss": 1.317, "step": 26337 }, { "epoch": 0.9432198685694845, "grad_norm": 1.4001402854919434, "learning_rate": 1.6851828893668543e-06, "loss": 1.2721, "step": 26338 }, { "epoch": 0.9432556806990527, "grad_norm": 1.814524531364441, "learning_rate": 1.6830631295337462e-06, "loss": 1.4002, "step": 26339 }, { "epoch": 0.943291492828621, "grad_norm": 1.9315727949142456, "learning_rate": 1.68094469242821e-06, "loss": 1.3913, "step": 26340 }, { "epoch": 0.9433273049581893, "grad_norm": 1.8088985681533813, "learning_rate": 1.6788275780787343e-06, "loss": 1.4572, "step": 26341 }, { "epoch": 0.9433631170877577, "grad_norm": 1.4374310970306396, "learning_rate": 1.6767117865138182e-06, "loss": 1.3947, "step": 26342 }, { "epoch": 0.9433989292173259, "grad_norm": 1.5694729089736938, "learning_rate": 1.6745973177619056e-06, "loss": 1.3992, "step": 26343 }, { "epoch": 0.9434347413468942, "grad_norm": 1.4492532014846802, "learning_rate": 1.6724841718514629e-06, "loss": 1.266, "step": 26344 }, { "epoch": 0.9434705534764625, "grad_norm": 1.6183035373687744, "learning_rate": 1.6703723488109112e-06, "loss": 1.5549, "step": 26345 }, { "epoch": 0.9435063656060307, "grad_norm": 1.5488311052322388, "learning_rate": 1.6682618486686619e-06, "loss": 1.3828, "step": 26346 }, { "epoch": 0.943542177735599, "grad_norm": 1.724896788597107, "learning_rate": 1.6661526714531029e-06, "loss": 1.1049, "step": 26347 }, { "epoch": 0.9435779898651673, "grad_norm": 1.9380396604537964, "learning_rate": 1.6640448171926226e-06, "loss": 1.3431, "step": 26348 }, { "epoch": 0.9436138019947357, "grad_norm": 2.2302026748657227, "learning_rate": 1.6619382859155873e-06, "loss": 1.6598, "step": 26349 }, { "epoch": 0.9436496141243039, "grad_norm": 2.1377158164978027, "learning_rate": 1.659833077650319e-06, "loss": 1.3672, "step": 26350 }, { "epoch": 0.9436854262538722, "grad_norm": 1.8395555019378662, "learning_rate": 1.6577291924251392e-06, "loss": 1.4068, "step": 26351 }, { "epoch": 0.9437212383834405, "grad_norm": 1.6605585813522339, "learning_rate": 1.6556266302683588e-06, "loss": 1.1427, "step": 26352 }, { "epoch": 0.9437570505130087, "grad_norm": 1.5564028024673462, "learning_rate": 1.6535253912082772e-06, "loss": 1.1415, "step": 26353 }, { "epoch": 0.943792862642577, "grad_norm": 1.9182465076446533, "learning_rate": 1.6514254752731494e-06, "loss": 1.3955, "step": 26354 }, { "epoch": 0.9438286747721453, "grad_norm": 1.4966611862182617, "learning_rate": 1.6493268824912312e-06, "loss": 1.3372, "step": 26355 }, { "epoch": 0.9438644869017137, "grad_norm": 1.8412766456604004, "learning_rate": 1.647229612890766e-06, "loss": 1.5312, "step": 26356 }, { "epoch": 0.9439002990312819, "grad_norm": 1.7466552257537842, "learning_rate": 1.6451336664999539e-06, "loss": 1.268, "step": 26357 }, { "epoch": 0.9439361111608502, "grad_norm": 1.5257654190063477, "learning_rate": 1.6430390433469945e-06, "loss": 1.4011, "step": 26358 }, { "epoch": 0.9439719232904185, "grad_norm": 1.8344718217849731, "learning_rate": 1.640945743460065e-06, "loss": 1.5296, "step": 26359 }, { "epoch": 0.9440077354199867, "grad_norm": 1.9049286842346191, "learning_rate": 1.6388537668673542e-06, "loss": 1.1641, "step": 26360 }, { "epoch": 0.944043547549555, "grad_norm": 1.3105257749557495, "learning_rate": 1.636763113596984e-06, "loss": 1.3407, "step": 26361 }, { "epoch": 0.9440793596791233, "grad_norm": 2.12705659866333, "learning_rate": 1.6346737836770875e-06, "loss": 1.4898, "step": 26362 }, { "epoch": 0.9441151718086916, "grad_norm": 1.5113617181777954, "learning_rate": 1.6325857771357756e-06, "loss": 1.5729, "step": 26363 }, { "epoch": 0.9441509839382599, "grad_norm": 1.867129921913147, "learning_rate": 1.6304990940011255e-06, "loss": 1.3769, "step": 26364 }, { "epoch": 0.9441867960678282, "grad_norm": 1.3271534442901611, "learning_rate": 1.6284137343012263e-06, "loss": 1.2326, "step": 26365 }, { "epoch": 0.9442226081973965, "grad_norm": 1.6503703594207764, "learning_rate": 1.6263296980641328e-06, "loss": 1.2799, "step": 26366 }, { "epoch": 0.9442584203269647, "grad_norm": 1.6071670055389404, "learning_rate": 1.62424698531789e-06, "loss": 1.2021, "step": 26367 }, { "epoch": 0.944294232456533, "grad_norm": 1.499289870262146, "learning_rate": 1.6221655960904968e-06, "loss": 1.0335, "step": 26368 }, { "epoch": 0.9443300445861013, "grad_norm": 1.6527241468429565, "learning_rate": 1.620085530409965e-06, "loss": 1.3493, "step": 26369 }, { "epoch": 0.9443658567156696, "grad_norm": 1.5437915325164795, "learning_rate": 1.6180067883042937e-06, "loss": 1.468, "step": 26370 }, { "epoch": 0.9444016688452379, "grad_norm": 2.0119853019714355, "learning_rate": 1.6159293698014278e-06, "loss": 1.4788, "step": 26371 }, { "epoch": 0.9444374809748062, "grad_norm": 1.936726689338684, "learning_rate": 1.6138532749293335e-06, "loss": 1.4597, "step": 26372 }, { "epoch": 0.9444732931043744, "grad_norm": 1.780268907546997, "learning_rate": 1.6117785037159216e-06, "loss": 1.1247, "step": 26373 }, { "epoch": 0.9445091052339427, "grad_norm": 1.8385742902755737, "learning_rate": 1.6097050561891369e-06, "loss": 1.5127, "step": 26374 }, { "epoch": 0.944544917363511, "grad_norm": 1.4780793190002441, "learning_rate": 1.6076329323768347e-06, "loss": 1.4763, "step": 26375 }, { "epoch": 0.9445807294930793, "grad_norm": 1.8092213869094849, "learning_rate": 1.605562132306937e-06, "loss": 1.3985, "step": 26376 }, { "epoch": 0.9446165416226476, "grad_norm": 1.405903935432434, "learning_rate": 1.6034926560072549e-06, "loss": 1.1074, "step": 26377 }, { "epoch": 0.9446523537522159, "grad_norm": 1.8108632564544678, "learning_rate": 1.6014245035056775e-06, "loss": 1.3274, "step": 26378 }, { "epoch": 0.9446881658817842, "grad_norm": 1.407546877861023, "learning_rate": 1.5993576748300043e-06, "loss": 1.4492, "step": 26379 }, { "epoch": 0.9447239780113524, "grad_norm": 1.6472501754760742, "learning_rate": 1.5972921700080357e-06, "loss": 1.2341, "step": 26380 }, { "epoch": 0.9447597901409207, "grad_norm": 1.5353707075119019, "learning_rate": 1.5952279890675826e-06, "loss": 1.1901, "step": 26381 }, { "epoch": 0.944795602270489, "grad_norm": 1.5727671384811401, "learning_rate": 1.5931651320364006e-06, "loss": 1.5107, "step": 26382 }, { "epoch": 0.9448314144000572, "grad_norm": 1.323904037475586, "learning_rate": 1.5911035989422562e-06, "loss": 1.3362, "step": 26383 }, { "epoch": 0.9448672265296256, "grad_norm": 1.4275002479553223, "learning_rate": 1.5890433898128498e-06, "loss": 1.4968, "step": 26384 }, { "epoch": 0.9449030386591939, "grad_norm": 1.5640442371368408, "learning_rate": 1.5869845046759369e-06, "loss": 1.4972, "step": 26385 }, { "epoch": 0.9449388507887622, "grad_norm": 2.0282034873962402, "learning_rate": 1.5849269435592061e-06, "loss": 1.6468, "step": 26386 }, { "epoch": 0.9449746629183304, "grad_norm": 1.7705632448196411, "learning_rate": 1.5828707064903359e-06, "loss": 1.3092, "step": 26387 }, { "epoch": 0.9450104750478987, "grad_norm": 1.7389479875564575, "learning_rate": 1.5808157934969813e-06, "loss": 1.0817, "step": 26388 }, { "epoch": 0.945046287177467, "grad_norm": 1.5240728855133057, "learning_rate": 1.5787622046068207e-06, "loss": 1.3589, "step": 26389 }, { "epoch": 0.9450820993070352, "grad_norm": 1.9323331117630005, "learning_rate": 1.576709939847454e-06, "loss": 1.2756, "step": 26390 }, { "epoch": 0.9451179114366036, "grad_norm": 1.5090250968933105, "learning_rate": 1.574658999246481e-06, "loss": 1.1489, "step": 26391 }, { "epoch": 0.9451537235661719, "grad_norm": 2.539374828338623, "learning_rate": 1.5726093828315248e-06, "loss": 1.2725, "step": 26392 }, { "epoch": 0.9451895356957402, "grad_norm": 1.6672533750534058, "learning_rate": 1.5705610906301404e-06, "loss": 1.317, "step": 26393 }, { "epoch": 0.9452253478253084, "grad_norm": 1.4400562047958374, "learning_rate": 1.5685141226699064e-06, "loss": 1.4577, "step": 26394 }, { "epoch": 0.9452611599548767, "grad_norm": 1.512329339981079, "learning_rate": 1.5664684789783224e-06, "loss": 1.2943, "step": 26395 }, { "epoch": 0.945296972084445, "grad_norm": 1.447803258895874, "learning_rate": 1.5644241595829557e-06, "loss": 1.2944, "step": 26396 }, { "epoch": 0.9453327842140132, "grad_norm": 1.6319823265075684, "learning_rate": 1.562381164511284e-06, "loss": 1.2474, "step": 26397 }, { "epoch": 0.9453685963435816, "grad_norm": 1.9061074256896973, "learning_rate": 1.5603394937907967e-06, "loss": 1.4807, "step": 26398 }, { "epoch": 0.9454044084731499, "grad_norm": 1.5398566722869873, "learning_rate": 1.5582991474489607e-06, "loss": 1.526, "step": 26399 }, { "epoch": 0.9454402206027182, "grad_norm": 4.880867958068848, "learning_rate": 1.5562601255132314e-06, "loss": 1.3848, "step": 26400 }, { "epoch": 0.9454760327322864, "grad_norm": 1.713074803352356, "learning_rate": 1.554222428011043e-06, "loss": 1.3903, "step": 26401 }, { "epoch": 0.9455118448618547, "grad_norm": 1.401732325553894, "learning_rate": 1.5521860549698063e-06, "loss": 1.6819, "step": 26402 }, { "epoch": 0.945547656991423, "grad_norm": 1.8747241497039795, "learning_rate": 1.550151006416911e-06, "loss": 1.5883, "step": 26403 }, { "epoch": 0.9455834691209912, "grad_norm": 1.228621244430542, "learning_rate": 1.5481172823797463e-06, "loss": 1.3525, "step": 26404 }, { "epoch": 0.9456192812505596, "grad_norm": 1.479820728302002, "learning_rate": 1.5460848828856677e-06, "loss": 1.6433, "step": 26405 }, { "epoch": 0.9456550933801279, "grad_norm": 1.6731551885604858, "learning_rate": 1.5440538079620204e-06, "loss": 1.3101, "step": 26406 }, { "epoch": 0.9456909055096961, "grad_norm": 1.7374619245529175, "learning_rate": 1.5420240576361378e-06, "loss": 1.4818, "step": 26407 }, { "epoch": 0.9457267176392644, "grad_norm": 1.6798261404037476, "learning_rate": 1.5399956319353092e-06, "loss": 1.2353, "step": 26408 }, { "epoch": 0.9457625297688327, "grad_norm": 1.8133715391159058, "learning_rate": 1.5379685308868464e-06, "loss": 1.3315, "step": 26409 }, { "epoch": 0.945798341898401, "grad_norm": 2.04004168510437, "learning_rate": 1.535942754517994e-06, "loss": 1.154, "step": 26410 }, { "epoch": 0.9458341540279692, "grad_norm": 1.9576140642166138, "learning_rate": 1.53391830285603e-06, "loss": 1.4195, "step": 26411 }, { "epoch": 0.9458699661575376, "grad_norm": 1.7399269342422485, "learning_rate": 1.5318951759281885e-06, "loss": 1.6082, "step": 26412 }, { "epoch": 0.9459057782871059, "grad_norm": 1.564070224761963, "learning_rate": 1.529873373761681e-06, "loss": 1.7338, "step": 26413 }, { "epoch": 0.9459415904166741, "grad_norm": 1.5939946174621582, "learning_rate": 1.527852896383708e-06, "loss": 1.4318, "step": 26414 }, { "epoch": 0.9459774025462424, "grad_norm": 1.428228497505188, "learning_rate": 1.5258337438214587e-06, "loss": 1.4115, "step": 26415 }, { "epoch": 0.9460132146758107, "grad_norm": 1.6301560401916504, "learning_rate": 1.5238159161020893e-06, "loss": 1.5172, "step": 26416 }, { "epoch": 0.946049026805379, "grad_norm": 1.5005348920822144, "learning_rate": 1.5217994132527448e-06, "loss": 1.5653, "step": 26417 }, { "epoch": 0.9460848389349472, "grad_norm": 1.9517136812210083, "learning_rate": 1.5197842353005698e-06, "loss": 1.4366, "step": 26418 }, { "epoch": 0.9461206510645156, "grad_norm": 1.6520404815673828, "learning_rate": 1.5177703822726652e-06, "loss": 1.4942, "step": 26419 }, { "epoch": 0.9461564631940839, "grad_norm": 1.7810132503509521, "learning_rate": 1.5157578541961315e-06, "loss": 1.1101, "step": 26420 }, { "epoch": 0.9461922753236521, "grad_norm": 1.829916000366211, "learning_rate": 1.5137466510980357e-06, "loss": 1.4551, "step": 26421 }, { "epoch": 0.9462280874532204, "grad_norm": 1.324633002281189, "learning_rate": 1.5117367730054343e-06, "loss": 1.3078, "step": 26422 }, { "epoch": 0.9462638995827887, "grad_norm": 1.974655270576477, "learning_rate": 1.5097282199453943e-06, "loss": 1.5415, "step": 26423 }, { "epoch": 0.9462997117123569, "grad_norm": 1.4947298765182495, "learning_rate": 1.5077209919449053e-06, "loss": 1.2912, "step": 26424 }, { "epoch": 0.9463355238419252, "grad_norm": 1.5060384273529053, "learning_rate": 1.505715089030979e-06, "loss": 1.293, "step": 26425 }, { "epoch": 0.9463713359714936, "grad_norm": 2.223021984100342, "learning_rate": 1.503710511230616e-06, "loss": 1.344, "step": 26426 }, { "epoch": 0.9464071481010619, "grad_norm": 1.7746235132217407, "learning_rate": 1.5017072585707725e-06, "loss": 1.4105, "step": 26427 }, { "epoch": 0.9464429602306301, "grad_norm": 1.6269015073776245, "learning_rate": 1.4997053310784047e-06, "loss": 1.2152, "step": 26428 }, { "epoch": 0.9464787723601984, "grad_norm": 1.9540683031082153, "learning_rate": 1.497704728780447e-06, "loss": 1.4217, "step": 26429 }, { "epoch": 0.9465145844897667, "grad_norm": 1.675334095954895, "learning_rate": 1.4957054517038106e-06, "loss": 1.4094, "step": 26430 }, { "epoch": 0.9465503966193349, "grad_norm": 2.8871023654937744, "learning_rate": 1.4937074998753965e-06, "loss": 1.3651, "step": 26431 }, { "epoch": 0.9465862087489032, "grad_norm": 1.911722183227539, "learning_rate": 1.491710873322083e-06, "loss": 1.441, "step": 26432 }, { "epoch": 0.9466220208784716, "grad_norm": 1.7608087062835693, "learning_rate": 1.489715572070738e-06, "loss": 1.421, "step": 26433 }, { "epoch": 0.9466578330080399, "grad_norm": 1.8853119611740112, "learning_rate": 1.4877215961482062e-06, "loss": 1.2649, "step": 26434 }, { "epoch": 0.9466936451376081, "grad_norm": 1.8366423845291138, "learning_rate": 1.4857289455812883e-06, "loss": 1.5063, "step": 26435 }, { "epoch": 0.9467294572671764, "grad_norm": 1.7807245254516602, "learning_rate": 1.48373762039683e-06, "loss": 1.3553, "step": 26436 }, { "epoch": 0.9467652693967447, "grad_norm": 1.6845966577529907, "learning_rate": 1.4817476206216096e-06, "loss": 1.2403, "step": 26437 }, { "epoch": 0.9468010815263129, "grad_norm": 1.5173600912094116, "learning_rate": 1.4797589462823836e-06, "loss": 1.5802, "step": 26438 }, { "epoch": 0.9468368936558812, "grad_norm": 1.406801462173462, "learning_rate": 1.4777715974059192e-06, "loss": 1.5145, "step": 26439 }, { "epoch": 0.9468727057854496, "grad_norm": 1.5290340185165405, "learning_rate": 1.4757855740189508e-06, "loss": 1.3885, "step": 26440 }, { "epoch": 0.9469085179150178, "grad_norm": 2.0321245193481445, "learning_rate": 1.4738008761482125e-06, "loss": 1.6672, "step": 26441 }, { "epoch": 0.9469443300445861, "grad_norm": 1.7161322832107544, "learning_rate": 1.471817503820383e-06, "loss": 1.3366, "step": 26442 }, { "epoch": 0.9469801421741544, "grad_norm": 1.8721601963043213, "learning_rate": 1.469835457062163e-06, "loss": 1.3743, "step": 26443 }, { "epoch": 0.9470159543037227, "grad_norm": 1.5677088499069214, "learning_rate": 1.4678547359002092e-06, "loss": 1.556, "step": 26444 }, { "epoch": 0.9470517664332909, "grad_norm": 1.6195982694625854, "learning_rate": 1.465875340361178e-06, "loss": 1.4419, "step": 26445 }, { "epoch": 0.9470875785628592, "grad_norm": 1.714670181274414, "learning_rate": 1.4638972704716814e-06, "loss": 1.7476, "step": 26446 }, { "epoch": 0.9471233906924276, "grad_norm": 1.6740520000457764, "learning_rate": 1.4619205262583536e-06, "loss": 1.4975, "step": 26447 }, { "epoch": 0.9471592028219958, "grad_norm": 1.2522761821746826, "learning_rate": 1.4599451077477844e-06, "loss": 1.4487, "step": 26448 }, { "epoch": 0.9471950149515641, "grad_norm": 1.4357094764709473, "learning_rate": 1.4579710149665416e-06, "loss": 1.2878, "step": 26449 }, { "epoch": 0.9472308270811324, "grad_norm": 1.617078423500061, "learning_rate": 1.4559982479411927e-06, "loss": 1.4942, "step": 26450 }, { "epoch": 0.9472666392107006, "grad_norm": 1.5373728275299072, "learning_rate": 1.4540268066982722e-06, "loss": 1.2622, "step": 26451 }, { "epoch": 0.9473024513402689, "grad_norm": 2.110975980758667, "learning_rate": 1.452056691264303e-06, "loss": 1.2574, "step": 26452 }, { "epoch": 0.9473382634698372, "grad_norm": 2.1490426063537598, "learning_rate": 1.4500879016657865e-06, "loss": 1.5997, "step": 26453 }, { "epoch": 0.9473740755994056, "grad_norm": 1.6299991607666016, "learning_rate": 1.4481204379292234e-06, "loss": 1.5127, "step": 26454 }, { "epoch": 0.9474098877289738, "grad_norm": 1.6052266359329224, "learning_rate": 1.4461543000810929e-06, "loss": 1.6573, "step": 26455 }, { "epoch": 0.9474456998585421, "grad_norm": 2.261275291442871, "learning_rate": 1.4441894881478069e-06, "loss": 1.5065, "step": 26456 }, { "epoch": 0.9474815119881104, "grad_norm": 1.8761277198791504, "learning_rate": 1.4422260021558331e-06, "loss": 1.6339, "step": 26457 }, { "epoch": 0.9475173241176786, "grad_norm": 1.7935103178024292, "learning_rate": 1.440263842131573e-06, "loss": 1.5145, "step": 26458 }, { "epoch": 0.9475531362472469, "grad_norm": 1.3724159002304077, "learning_rate": 1.4383030081014493e-06, "loss": 1.3055, "step": 26459 }, { "epoch": 0.9475889483768152, "grad_norm": 1.9284459352493286, "learning_rate": 1.436343500091808e-06, "loss": 1.5151, "step": 26460 }, { "epoch": 0.9476247605063836, "grad_norm": 1.7961905002593994, "learning_rate": 1.4343853181290168e-06, "loss": 1.5256, "step": 26461 }, { "epoch": 0.9476605726359518, "grad_norm": 1.7151559591293335, "learning_rate": 1.4324284622394547e-06, "loss": 1.4779, "step": 26462 }, { "epoch": 0.9476963847655201, "grad_norm": 1.4853241443634033, "learning_rate": 1.4304729324494115e-06, "loss": 1.4664, "step": 26463 }, { "epoch": 0.9477321968950884, "grad_norm": 1.5965665578842163, "learning_rate": 1.4285187287851997e-06, "loss": 1.3899, "step": 26464 }, { "epoch": 0.9477680090246566, "grad_norm": 1.4473627805709839, "learning_rate": 1.4265658512731316e-06, "loss": 1.5785, "step": 26465 }, { "epoch": 0.9478038211542249, "grad_norm": 1.9590723514556885, "learning_rate": 1.4246142999394751e-06, "loss": 1.564, "step": 26466 }, { "epoch": 0.9478396332837932, "grad_norm": 1.9028202295303345, "learning_rate": 1.4226640748104757e-06, "loss": 1.5236, "step": 26467 }, { "epoch": 0.9478754454133616, "grad_norm": 1.640310525894165, "learning_rate": 1.4207151759123683e-06, "loss": 1.5118, "step": 26468 }, { "epoch": 0.9479112575429298, "grad_norm": 1.495152473449707, "learning_rate": 1.418767603271387e-06, "loss": 1.4463, "step": 26469 }, { "epoch": 0.9479470696724981, "grad_norm": 1.3607423305511475, "learning_rate": 1.4168213569137223e-06, "loss": 1.2448, "step": 26470 }, { "epoch": 0.9479828818020664, "grad_norm": 1.2964245080947876, "learning_rate": 1.4148764368655754e-06, "loss": 1.4987, "step": 26471 }, { "epoch": 0.9480186939316346, "grad_norm": 1.689813494682312, "learning_rate": 1.4129328431530807e-06, "loss": 1.2008, "step": 26472 }, { "epoch": 0.9480545060612029, "grad_norm": 1.8330782651901245, "learning_rate": 1.4109905758024177e-06, "loss": 1.8116, "step": 26473 }, { "epoch": 0.9480903181907712, "grad_norm": 1.5751451253890991, "learning_rate": 1.4090496348397097e-06, "loss": 1.2216, "step": 26474 }, { "epoch": 0.9481261303203395, "grad_norm": 2.155590057373047, "learning_rate": 1.407110020291058e-06, "loss": 1.3441, "step": 26475 }, { "epoch": 0.9481619424499078, "grad_norm": 2.1611201763153076, "learning_rate": 1.4051717321825643e-06, "loss": 1.453, "step": 26476 }, { "epoch": 0.9481977545794761, "grad_norm": 1.6963903903961182, "learning_rate": 1.403234770540307e-06, "loss": 1.1672, "step": 26477 }, { "epoch": 0.9482335667090444, "grad_norm": 1.572092056274414, "learning_rate": 1.4012991353903549e-06, "loss": 1.6028, "step": 26478 }, { "epoch": 0.9482693788386126, "grad_norm": 1.452835202217102, "learning_rate": 1.3993648267587312e-06, "loss": 1.4143, "step": 26479 }, { "epoch": 0.9483051909681809, "grad_norm": 1.6740903854370117, "learning_rate": 1.3974318446714706e-06, "loss": 1.61, "step": 26480 }, { "epoch": 0.9483410030977492, "grad_norm": 1.7781686782836914, "learning_rate": 1.395500189154575e-06, "loss": 1.4476, "step": 26481 }, { "epoch": 0.9483768152273175, "grad_norm": 1.7080374956130981, "learning_rate": 1.3935698602340452e-06, "loss": 1.3767, "step": 26482 }, { "epoch": 0.9484126273568858, "grad_norm": 1.3993390798568726, "learning_rate": 1.3916408579358164e-06, "loss": 1.5493, "step": 26483 }, { "epoch": 0.9484484394864541, "grad_norm": 1.3815793991088867, "learning_rate": 1.3897131822858789e-06, "loss": 1.4565, "step": 26484 }, { "epoch": 0.9484842516160223, "grad_norm": 2.051628589630127, "learning_rate": 1.3877868333101562e-06, "loss": 1.5791, "step": 26485 }, { "epoch": 0.9485200637455906, "grad_norm": 1.3823293447494507, "learning_rate": 1.385861811034561e-06, "loss": 1.2821, "step": 26486 }, { "epoch": 0.9485558758751589, "grad_norm": 2.022139310836792, "learning_rate": 1.383938115484984e-06, "loss": 1.4049, "step": 26487 }, { "epoch": 0.9485916880047272, "grad_norm": 1.9774534702301025, "learning_rate": 1.3820157466873152e-06, "loss": 1.4567, "step": 26488 }, { "epoch": 0.9486275001342955, "grad_norm": 1.6379338502883911, "learning_rate": 1.3800947046674228e-06, "loss": 1.175, "step": 26489 }, { "epoch": 0.9486633122638638, "grad_norm": 1.9727866649627686, "learning_rate": 1.3781749894511308e-06, "loss": 1.3028, "step": 26490 }, { "epoch": 0.9486991243934321, "grad_norm": 1.7060546875, "learning_rate": 1.3762566010642962e-06, "loss": 1.3837, "step": 26491 }, { "epoch": 0.9487349365230003, "grad_norm": 1.596717119216919, "learning_rate": 1.3743395395326985e-06, "loss": 1.2626, "step": 26492 }, { "epoch": 0.9487707486525686, "grad_norm": 1.3802907466888428, "learning_rate": 1.3724238048821615e-06, "loss": 1.1592, "step": 26493 }, { "epoch": 0.9488065607821369, "grad_norm": 1.8134254217147827, "learning_rate": 1.370509397138431e-06, "loss": 1.674, "step": 26494 }, { "epoch": 0.9488423729117051, "grad_norm": 1.4546838998794556, "learning_rate": 1.3685963163272752e-06, "loss": 1.401, "step": 26495 }, { "epoch": 0.9488781850412735, "grad_norm": 1.6962960958480835, "learning_rate": 1.3666845624744406e-06, "loss": 1.3322, "step": 26496 }, { "epoch": 0.9489139971708418, "grad_norm": 1.3423304557800293, "learning_rate": 1.3647741356056287e-06, "loss": 1.2208, "step": 26497 }, { "epoch": 0.9489498093004101, "grad_norm": 1.656837821006775, "learning_rate": 1.3628650357465522e-06, "loss": 1.3385, "step": 26498 }, { "epoch": 0.9489856214299783, "grad_norm": 1.3497300148010254, "learning_rate": 1.3609572629228906e-06, "loss": 1.2634, "step": 26499 }, { "epoch": 0.9490214335595466, "grad_norm": 2.2240593433380127, "learning_rate": 1.3590508171603233e-06, "loss": 1.6153, "step": 26500 }, { "epoch": 0.9490572456891149, "grad_norm": 1.696808934211731, "learning_rate": 1.3571456984844743e-06, "loss": 1.33, "step": 26501 }, { "epoch": 0.9490930578186831, "grad_norm": 1.5843878984451294, "learning_rate": 1.3552419069210009e-06, "loss": 1.2396, "step": 26502 }, { "epoch": 0.9491288699482515, "grad_norm": 1.4548217058181763, "learning_rate": 1.3533394424954937e-06, "loss": 1.2981, "step": 26503 }, { "epoch": 0.9491646820778198, "grad_norm": 1.916221022605896, "learning_rate": 1.3514383052335766e-06, "loss": 1.3667, "step": 26504 }, { "epoch": 0.9492004942073881, "grad_norm": 1.5517075061798096, "learning_rate": 1.3495384951607958e-06, "loss": 1.2612, "step": 26505 }, { "epoch": 0.9492363063369563, "grad_norm": 3.5113589763641357, "learning_rate": 1.3476400123027312e-06, "loss": 1.6406, "step": 26506 }, { "epoch": 0.9492721184665246, "grad_norm": 1.6516262292861938, "learning_rate": 1.3457428566849173e-06, "loss": 1.6534, "step": 26507 }, { "epoch": 0.9493079305960929, "grad_norm": 1.575160264968872, "learning_rate": 1.3438470283328785e-06, "loss": 1.4022, "step": 26508 }, { "epoch": 0.9493437427256611, "grad_norm": 1.442672610282898, "learning_rate": 1.3419525272721168e-06, "loss": 1.4947, "step": 26509 }, { "epoch": 0.9493795548552295, "grad_norm": 1.832108736038208, "learning_rate": 1.3400593535281224e-06, "loss": 1.4931, "step": 26510 }, { "epoch": 0.9494153669847978, "grad_norm": 1.702636957168579, "learning_rate": 1.3381675071263755e-06, "loss": 1.564, "step": 26511 }, { "epoch": 0.949451179114366, "grad_norm": 1.6425777673721313, "learning_rate": 1.3362769880923221e-06, "loss": 1.2737, "step": 26512 }, { "epoch": 0.9494869912439343, "grad_norm": 1.3201589584350586, "learning_rate": 1.3343877964513863e-06, "loss": 1.3811, "step": 26513 }, { "epoch": 0.9495228033735026, "grad_norm": 2.1531147956848145, "learning_rate": 1.3324999322290033e-06, "loss": 1.2665, "step": 26514 }, { "epoch": 0.9495586155030709, "grad_norm": 1.7345833778381348, "learning_rate": 1.330613395450553e-06, "loss": 1.6657, "step": 26515 }, { "epoch": 0.9495944276326391, "grad_norm": 1.5731521844863892, "learning_rate": 1.3287281861414258e-06, "loss": 1.4453, "step": 26516 }, { "epoch": 0.9496302397622075, "grad_norm": 1.4536832571029663, "learning_rate": 1.3268443043269796e-06, "loss": 1.5805, "step": 26517 }, { "epoch": 0.9496660518917758, "grad_norm": 1.422829031944275, "learning_rate": 1.3249617500325718e-06, "loss": 1.7652, "step": 26518 }, { "epoch": 0.949701864021344, "grad_norm": 1.5982391834259033, "learning_rate": 1.3230805232835153e-06, "loss": 1.4507, "step": 26519 }, { "epoch": 0.9497376761509123, "grad_norm": 1.574022650718689, "learning_rate": 1.3212006241051345e-06, "loss": 1.5129, "step": 26520 }, { "epoch": 0.9497734882804806, "grad_norm": 1.3596420288085938, "learning_rate": 1.319322052522709e-06, "loss": 1.2358, "step": 26521 }, { "epoch": 0.9498093004100489, "grad_norm": 1.462693214416504, "learning_rate": 1.3174448085615187e-06, "loss": 1.4235, "step": 26522 }, { "epoch": 0.9498451125396171, "grad_norm": 1.5312095880508423, "learning_rate": 1.3155688922468101e-06, "loss": 1.4207, "step": 26523 }, { "epoch": 0.9498809246691855, "grad_norm": 1.4311823844909668, "learning_rate": 1.3136943036038297e-06, "loss": 1.3391, "step": 26524 }, { "epoch": 0.9499167367987538, "grad_norm": 1.4623321294784546, "learning_rate": 1.3118210426578015e-06, "loss": 1.2909, "step": 26525 }, { "epoch": 0.949952548928322, "grad_norm": 1.7718008756637573, "learning_rate": 1.3099491094339279e-06, "loss": 1.3087, "step": 26526 }, { "epoch": 0.9499883610578903, "grad_norm": 1.7378346920013428, "learning_rate": 1.3080785039573773e-06, "loss": 1.4943, "step": 26527 }, { "epoch": 0.9500241731874586, "grad_norm": 1.7058058977127075, "learning_rate": 1.3062092262533189e-06, "loss": 1.4128, "step": 26528 }, { "epoch": 0.9500599853170268, "grad_norm": 1.367444396018982, "learning_rate": 1.304341276346932e-06, "loss": 1.0962, "step": 26529 }, { "epoch": 0.9500957974465951, "grad_norm": 1.446695327758789, "learning_rate": 1.3024746542633082e-06, "loss": 1.2731, "step": 26530 }, { "epoch": 0.9501316095761635, "grad_norm": 1.3863316774368286, "learning_rate": 1.3006093600275825e-06, "loss": 1.5057, "step": 26531 }, { "epoch": 0.9501674217057318, "grad_norm": 2.1868419647216797, "learning_rate": 1.2987453936648575e-06, "loss": 1.3829, "step": 26532 }, { "epoch": 0.9502032338353, "grad_norm": 1.467094898223877, "learning_rate": 1.2968827552001793e-06, "loss": 1.5164, "step": 26533 }, { "epoch": 0.9502390459648683, "grad_norm": 1.3066911697387695, "learning_rate": 1.2950214446586284e-06, "loss": 1.3691, "step": 26534 }, { "epoch": 0.9502748580944366, "grad_norm": 1.5992457866668701, "learning_rate": 1.2931614620652511e-06, "loss": 1.4246, "step": 26535 }, { "epoch": 0.9503106702240048, "grad_norm": 1.3781858682632446, "learning_rate": 1.2913028074450607e-06, "loss": 1.4019, "step": 26536 }, { "epoch": 0.9503464823535731, "grad_norm": 1.9242557287216187, "learning_rate": 1.2894454808230593e-06, "loss": 1.3886, "step": 26537 }, { "epoch": 0.9503822944831415, "grad_norm": 1.6685802936553955, "learning_rate": 1.2875894822242496e-06, "loss": 1.3555, "step": 26538 }, { "epoch": 0.9504181066127098, "grad_norm": 1.6586769819259644, "learning_rate": 1.2857348116736002e-06, "loss": 1.3652, "step": 26539 }, { "epoch": 0.950453918742278, "grad_norm": 1.9376729726791382, "learning_rate": 1.2838814691960355e-06, "loss": 1.5063, "step": 26540 }, { "epoch": 0.9504897308718463, "grad_norm": 1.4433695077896118, "learning_rate": 1.2820294548165246e-06, "loss": 1.2929, "step": 26541 }, { "epoch": 0.9505255430014146, "grad_norm": 1.3399428129196167, "learning_rate": 1.2801787685599698e-06, "loss": 1.3715, "step": 26542 }, { "epoch": 0.9505613551309828, "grad_norm": 1.332889437675476, "learning_rate": 1.2783294104512734e-06, "loss": 1.4932, "step": 26543 }, { "epoch": 0.9505971672605511, "grad_norm": 1.9041041135787964, "learning_rate": 1.2764813805153041e-06, "loss": 1.6289, "step": 26544 }, { "epoch": 0.9506329793901195, "grad_norm": 1.5188629627227783, "learning_rate": 1.2746346787769425e-06, "loss": 1.08, "step": 26545 }, { "epoch": 0.9506687915196878, "grad_norm": 1.580101490020752, "learning_rate": 1.272789305261013e-06, "loss": 1.5524, "step": 26546 }, { "epoch": 0.950704603649256, "grad_norm": 1.4802852869033813, "learning_rate": 1.2709452599923731e-06, "loss": 1.4638, "step": 26547 }, { "epoch": 0.9507404157788243, "grad_norm": 3.1193196773529053, "learning_rate": 1.2691025429958037e-06, "loss": 1.2083, "step": 26548 }, { "epoch": 0.9507762279083926, "grad_norm": 1.8259837627410889, "learning_rate": 1.2672611542960954e-06, "loss": 1.3745, "step": 26549 }, { "epoch": 0.9508120400379608, "grad_norm": 1.8224256038665771, "learning_rate": 1.2654210939180511e-06, "loss": 1.4162, "step": 26550 }, { "epoch": 0.9508478521675291, "grad_norm": 2.090353488922119, "learning_rate": 1.2635823618863951e-06, "loss": 1.4225, "step": 26551 }, { "epoch": 0.9508836642970975, "grad_norm": 1.8558623790740967, "learning_rate": 1.2617449582258744e-06, "loss": 1.6497, "step": 26552 }, { "epoch": 0.9509194764266657, "grad_norm": 1.5263234376907349, "learning_rate": 1.2599088829612249e-06, "loss": 1.4847, "step": 26553 }, { "epoch": 0.950955288556234, "grad_norm": 1.3935396671295166, "learning_rate": 1.2580741361171267e-06, "loss": 1.5371, "step": 26554 }, { "epoch": 0.9509911006858023, "grad_norm": 1.8329626321792603, "learning_rate": 1.2562407177182712e-06, "loss": 1.3644, "step": 26555 }, { "epoch": 0.9510269128153706, "grad_norm": 1.6709609031677246, "learning_rate": 1.2544086277893386e-06, "loss": 1.6079, "step": 26556 }, { "epoch": 0.9510627249449388, "grad_norm": 1.656237244606018, "learning_rate": 1.2525778663549537e-06, "loss": 1.2923, "step": 26557 }, { "epoch": 0.9510985370745071, "grad_norm": 1.7023342847824097, "learning_rate": 1.2507484334397634e-06, "loss": 1.4957, "step": 26558 }, { "epoch": 0.9511343492040755, "grad_norm": 1.3861737251281738, "learning_rate": 1.2489203290683703e-06, "loss": 1.3171, "step": 26559 }, { "epoch": 0.9511701613336437, "grad_norm": 1.4072065353393555, "learning_rate": 1.2470935532653772e-06, "loss": 1.5584, "step": 26560 }, { "epoch": 0.951205973463212, "grad_norm": 1.6545718908309937, "learning_rate": 1.2452681060553639e-06, "loss": 1.6098, "step": 26561 }, { "epoch": 0.9512417855927803, "grad_norm": 1.359649658203125, "learning_rate": 1.243443987462878e-06, "loss": 1.4323, "step": 26562 }, { "epoch": 0.9512775977223485, "grad_norm": 1.870990514755249, "learning_rate": 1.2416211975124658e-06, "loss": 1.4793, "step": 26563 }, { "epoch": 0.9513134098519168, "grad_norm": 1.6119709014892578, "learning_rate": 1.2397997362286528e-06, "loss": 1.1674, "step": 26564 }, { "epoch": 0.9513492219814851, "grad_norm": 1.4036765098571777, "learning_rate": 1.2379796036359526e-06, "loss": 1.4654, "step": 26565 }, { "epoch": 0.9513850341110535, "grad_norm": 1.458703875541687, "learning_rate": 1.2361607997588343e-06, "loss": 1.538, "step": 26566 }, { "epoch": 0.9514208462406217, "grad_norm": 1.934006690979004, "learning_rate": 1.2343433246217673e-06, "loss": 1.5755, "step": 26567 }, { "epoch": 0.95145665837019, "grad_norm": 1.8799941539764404, "learning_rate": 1.232527178249232e-06, "loss": 1.5967, "step": 26568 }, { "epoch": 0.9514924704997583, "grad_norm": 2.1603283882141113, "learning_rate": 1.2307123606656312e-06, "loss": 1.2299, "step": 26569 }, { "epoch": 0.9515282826293265, "grad_norm": 2.093066692352295, "learning_rate": 1.2288988718953897e-06, "loss": 1.441, "step": 26570 }, { "epoch": 0.9515640947588948, "grad_norm": 2.1063077449798584, "learning_rate": 1.2270867119629103e-06, "loss": 1.376, "step": 26571 }, { "epoch": 0.9515999068884631, "grad_norm": 1.4114335775375366, "learning_rate": 1.2252758808925736e-06, "loss": 1.3026, "step": 26572 }, { "epoch": 0.9516357190180315, "grad_norm": 1.5860474109649658, "learning_rate": 1.2234663787087375e-06, "loss": 1.3799, "step": 26573 }, { "epoch": 0.9516715311475997, "grad_norm": 1.4082696437835693, "learning_rate": 1.2216582054357495e-06, "loss": 1.3476, "step": 26574 }, { "epoch": 0.951707343277168, "grad_norm": 1.7118854522705078, "learning_rate": 1.2198513610979346e-06, "loss": 1.3482, "step": 26575 }, { "epoch": 0.9517431554067363, "grad_norm": 2.112502336502075, "learning_rate": 1.2180458457196064e-06, "loss": 1.2543, "step": 26576 }, { "epoch": 0.9517789675363045, "grad_norm": 1.6754275560379028, "learning_rate": 1.2162416593250569e-06, "loss": 1.5346, "step": 26577 }, { "epoch": 0.9518147796658728, "grad_norm": 1.9799768924713135, "learning_rate": 1.2144388019385333e-06, "loss": 1.2265, "step": 26578 }, { "epoch": 0.9518505917954411, "grad_norm": 1.3142305612564087, "learning_rate": 1.2126372735843272e-06, "loss": 1.4418, "step": 26579 }, { "epoch": 0.9518864039250094, "grad_norm": 1.5378390550613403, "learning_rate": 1.2108370742866526e-06, "loss": 1.7047, "step": 26580 }, { "epoch": 0.9519222160545777, "grad_norm": 1.4842941761016846, "learning_rate": 1.2090382040697456e-06, "loss": 1.3786, "step": 26581 }, { "epoch": 0.951958028184146, "grad_norm": 1.9399491548538208, "learning_rate": 1.2072406629577871e-06, "loss": 1.6141, "step": 26582 }, { "epoch": 0.9519938403137143, "grad_norm": 2.0363988876342773, "learning_rate": 1.2054444509749906e-06, "loss": 1.5161, "step": 26583 }, { "epoch": 0.9520296524432825, "grad_norm": 1.677929162979126, "learning_rate": 1.203649568145493e-06, "loss": 1.4207, "step": 26584 }, { "epoch": 0.9520654645728508, "grad_norm": 1.5547820329666138, "learning_rate": 1.201856014493441e-06, "loss": 1.4561, "step": 26585 }, { "epoch": 0.9521012767024191, "grad_norm": 1.590218186378479, "learning_rate": 1.2000637900429934e-06, "loss": 1.3997, "step": 26586 }, { "epoch": 0.9521370888319874, "grad_norm": 1.4837863445281982, "learning_rate": 1.1982728948182308e-06, "loss": 1.3099, "step": 26587 }, { "epoch": 0.9521729009615557, "grad_norm": 1.7251883745193481, "learning_rate": 1.1964833288432674e-06, "loss": 1.3323, "step": 26588 }, { "epoch": 0.952208713091124, "grad_norm": 2.328374147415161, "learning_rate": 1.194695092142173e-06, "loss": 1.6759, "step": 26589 }, { "epoch": 0.9522445252206923, "grad_norm": 1.6122950315475464, "learning_rate": 1.1929081847390056e-06, "loss": 1.2681, "step": 26590 }, { "epoch": 0.9522803373502605, "grad_norm": 1.4836642742156982, "learning_rate": 1.191122606657813e-06, "loss": 1.2356, "step": 26591 }, { "epoch": 0.9523161494798288, "grad_norm": 1.837382435798645, "learning_rate": 1.1893383579226091e-06, "loss": 1.5641, "step": 26592 }, { "epoch": 0.9523519616093971, "grad_norm": 2.236088275909424, "learning_rate": 1.1875554385573972e-06, "loss": 1.538, "step": 26593 }, { "epoch": 0.9523877737389654, "grad_norm": 1.5058979988098145, "learning_rate": 1.185773848586158e-06, "loss": 1.551, "step": 26594 }, { "epoch": 0.9524235858685337, "grad_norm": 2.073533773422241, "learning_rate": 1.1839935880328946e-06, "loss": 1.7001, "step": 26595 }, { "epoch": 0.952459397998102, "grad_norm": 2.2686805725097656, "learning_rate": 1.1822146569215097e-06, "loss": 1.5851, "step": 26596 }, { "epoch": 0.9524952101276702, "grad_norm": 1.4711614847183228, "learning_rate": 1.1804370552759735e-06, "loss": 1.4615, "step": 26597 }, { "epoch": 0.9525310222572385, "grad_norm": 1.4939398765563965, "learning_rate": 1.178660783120189e-06, "loss": 1.4739, "step": 26598 }, { "epoch": 0.9525668343868068, "grad_norm": 1.7867990732192993, "learning_rate": 1.176885840478048e-06, "loss": 1.6565, "step": 26599 }, { "epoch": 0.952602646516375, "grad_norm": 1.488075613975525, "learning_rate": 1.1751122273734316e-06, "loss": 1.131, "step": 26600 }, { "epoch": 0.9526384586459434, "grad_norm": 2.2243800163269043, "learning_rate": 1.1733399438302206e-06, "loss": 1.2857, "step": 26601 }, { "epoch": 0.9526742707755117, "grad_norm": 1.2374006509780884, "learning_rate": 1.1715689898722404e-06, "loss": 1.6162, "step": 26602 }, { "epoch": 0.95271008290508, "grad_norm": 1.3920713663101196, "learning_rate": 1.1697993655233164e-06, "loss": 1.394, "step": 26603 }, { "epoch": 0.9527458950346482, "grad_norm": 1.5225958824157715, "learning_rate": 1.1680310708072518e-06, "loss": 1.46, "step": 26604 }, { "epoch": 0.9527817071642165, "grad_norm": 1.5914231538772583, "learning_rate": 1.1662641057478497e-06, "loss": 1.4585, "step": 26605 }, { "epoch": 0.9528175192937848, "grad_norm": 1.5339897871017456, "learning_rate": 1.1644984703688799e-06, "loss": 1.4752, "step": 26606 }, { "epoch": 0.952853331423353, "grad_norm": 1.8416153192520142, "learning_rate": 1.1627341646941015e-06, "loss": 1.706, "step": 26607 }, { "epoch": 0.9528891435529214, "grad_norm": 1.4199974536895752, "learning_rate": 1.1609711887472286e-06, "loss": 1.3469, "step": 26608 }, { "epoch": 0.9529249556824897, "grad_norm": 1.518681287765503, "learning_rate": 1.1592095425520088e-06, "loss": 1.392, "step": 26609 }, { "epoch": 0.952960767812058, "grad_norm": 1.9634442329406738, "learning_rate": 1.1574492261321236e-06, "loss": 1.1609, "step": 26610 }, { "epoch": 0.9529965799416262, "grad_norm": 1.7711156606674194, "learning_rate": 1.1556902395112645e-06, "loss": 1.3154, "step": 26611 }, { "epoch": 0.9530323920711945, "grad_norm": 1.8715670108795166, "learning_rate": 1.1539325827130799e-06, "loss": 1.1835, "step": 26612 }, { "epoch": 0.9530682042007628, "grad_norm": 1.6497482061386108, "learning_rate": 1.1521762557612502e-06, "loss": 1.1409, "step": 26613 }, { "epoch": 0.953104016330331, "grad_norm": 1.8769314289093018, "learning_rate": 1.1504212586793683e-06, "loss": 1.4843, "step": 26614 }, { "epoch": 0.9531398284598994, "grad_norm": 1.9313020706176758, "learning_rate": 1.1486675914910705e-06, "loss": 1.6296, "step": 26615 }, { "epoch": 0.9531756405894677, "grad_norm": 1.3665918111801147, "learning_rate": 1.1469152542199379e-06, "loss": 1.3802, "step": 26616 }, { "epoch": 0.953211452719036, "grad_norm": 1.4282692670822144, "learning_rate": 1.1451642468895518e-06, "loss": 1.2054, "step": 26617 }, { "epoch": 0.9532472648486042, "grad_norm": 1.5573898553848267, "learning_rate": 1.14341456952346e-06, "loss": 1.4843, "step": 26618 }, { "epoch": 0.9532830769781725, "grad_norm": 1.994338870048523, "learning_rate": 1.1416662221452211e-06, "loss": 1.3083, "step": 26619 }, { "epoch": 0.9533188891077408, "grad_norm": 1.591489315032959, "learning_rate": 1.139919204778339e-06, "loss": 1.3245, "step": 26620 }, { "epoch": 0.953354701237309, "grad_norm": 1.2546374797821045, "learning_rate": 1.1381735174463283e-06, "loss": 0.9723, "step": 26621 }, { "epoch": 0.9533905133668774, "grad_norm": 1.652453064918518, "learning_rate": 1.1364291601726585e-06, "loss": 1.4873, "step": 26622 }, { "epoch": 0.9534263254964457, "grad_norm": 1.4358993768692017, "learning_rate": 1.1346861329808112e-06, "loss": 1.5944, "step": 26623 }, { "epoch": 0.953462137626014, "grad_norm": 1.5629358291625977, "learning_rate": 1.1329444358942454e-06, "loss": 1.2247, "step": 26624 }, { "epoch": 0.9534979497555822, "grad_norm": 1.5762356519699097, "learning_rate": 1.1312040689363757e-06, "loss": 1.4513, "step": 26625 }, { "epoch": 0.9535337618851505, "grad_norm": 1.2871313095092773, "learning_rate": 1.1294650321306277e-06, "loss": 1.2398, "step": 26626 }, { "epoch": 0.9535695740147188, "grad_norm": 1.5916223526000977, "learning_rate": 1.127727325500394e-06, "loss": 1.5777, "step": 26627 }, { "epoch": 0.953605386144287, "grad_norm": 1.6760125160217285, "learning_rate": 1.1259909490690556e-06, "loss": 1.4228, "step": 26628 }, { "epoch": 0.9536411982738554, "grad_norm": 1.6149671077728271, "learning_rate": 1.1242559028599609e-06, "loss": 1.4672, "step": 26629 }, { "epoch": 0.9536770104034237, "grad_norm": 1.6717442274093628, "learning_rate": 1.1225221868964686e-06, "loss": 1.2505, "step": 26630 }, { "epoch": 0.9537128225329919, "grad_norm": 1.4332804679870605, "learning_rate": 1.1207898012018936e-06, "loss": 1.3423, "step": 26631 }, { "epoch": 0.9537486346625602, "grad_norm": 1.3053473234176636, "learning_rate": 1.1190587457995506e-06, "loss": 1.7562, "step": 26632 }, { "epoch": 0.9537844467921285, "grad_norm": 1.356052279472351, "learning_rate": 1.1173290207127207e-06, "loss": 1.47, "step": 26633 }, { "epoch": 0.9538202589216968, "grad_norm": 1.9330263137817383, "learning_rate": 1.1156006259646856e-06, "loss": 1.6352, "step": 26634 }, { "epoch": 0.953856071051265, "grad_norm": 2.3237826824188232, "learning_rate": 1.1138735615786933e-06, "loss": 1.331, "step": 26635 }, { "epoch": 0.9538918831808334, "grad_norm": 1.5061534643173218, "learning_rate": 1.1121478275779696e-06, "loss": 1.2172, "step": 26636 }, { "epoch": 0.9539276953104017, "grad_norm": 1.64857816696167, "learning_rate": 1.1104234239857402e-06, "loss": 1.6107, "step": 26637 }, { "epoch": 0.9539635074399699, "grad_norm": 1.9496674537658691, "learning_rate": 1.1087003508252202e-06, "loss": 1.3248, "step": 26638 }, { "epoch": 0.9539993195695382, "grad_norm": 1.5698601007461548, "learning_rate": 1.1069786081195687e-06, "loss": 1.4504, "step": 26639 }, { "epoch": 0.9540351316991065, "grad_norm": 1.6455700397491455, "learning_rate": 1.105258195891945e-06, "loss": 1.368, "step": 26640 }, { "epoch": 0.9540709438286747, "grad_norm": 1.2808388471603394, "learning_rate": 1.1035391141655195e-06, "loss": 1.5479, "step": 26641 }, { "epoch": 0.954106755958243, "grad_norm": 2.0161030292510986, "learning_rate": 1.1018213629634178e-06, "loss": 1.6358, "step": 26642 }, { "epoch": 0.9541425680878114, "grad_norm": 1.532594919204712, "learning_rate": 1.1001049423087217e-06, "loss": 1.046, "step": 26643 }, { "epoch": 0.9541783802173797, "grad_norm": 1.7464717626571655, "learning_rate": 1.098389852224546e-06, "loss": 1.1881, "step": 26644 }, { "epoch": 0.9542141923469479, "grad_norm": 1.4636871814727783, "learning_rate": 1.0966760927339726e-06, "loss": 1.4925, "step": 26645 }, { "epoch": 0.9542500044765162, "grad_norm": 1.6371227502822876, "learning_rate": 1.094963663860027e-06, "loss": 1.5365, "step": 26646 }, { "epoch": 0.9542858166060845, "grad_norm": 1.4008408784866333, "learning_rate": 1.0932525656257796e-06, "loss": 1.4636, "step": 26647 }, { "epoch": 0.9543216287356527, "grad_norm": 1.474956750869751, "learning_rate": 1.0915427980542348e-06, "loss": 1.5885, "step": 26648 }, { "epoch": 0.954357440865221, "grad_norm": 1.310798168182373, "learning_rate": 1.089834361168407e-06, "loss": 1.1127, "step": 26649 }, { "epoch": 0.9543932529947894, "grad_norm": 1.807181715965271, "learning_rate": 1.088127254991267e-06, "loss": 1.4589, "step": 26650 }, { "epoch": 0.9544290651243577, "grad_norm": 1.9590734243392944, "learning_rate": 1.086421479545785e-06, "loss": 1.5383, "step": 26651 }, { "epoch": 0.9544648772539259, "grad_norm": 1.3808904886245728, "learning_rate": 1.0847170348549096e-06, "loss": 1.3616, "step": 26652 }, { "epoch": 0.9545006893834942, "grad_norm": 2.0818872451782227, "learning_rate": 1.0830139209415779e-06, "loss": 1.6911, "step": 26653 }, { "epoch": 0.9545365015130625, "grad_norm": 1.6958940029144287, "learning_rate": 1.081312137828716e-06, "loss": 1.8023, "step": 26654 }, { "epoch": 0.9545723136426307, "grad_norm": 1.6296740770339966, "learning_rate": 1.0796116855391724e-06, "loss": 1.0263, "step": 26655 }, { "epoch": 0.954608125772199, "grad_norm": 1.9934402704238892, "learning_rate": 1.0779125640958843e-06, "loss": 1.5194, "step": 26656 }, { "epoch": 0.9546439379017674, "grad_norm": 1.6104828119277954, "learning_rate": 1.0762147735216665e-06, "loss": 1.5336, "step": 26657 }, { "epoch": 0.9546797500313356, "grad_norm": 1.7793383598327637, "learning_rate": 1.0745183138393788e-06, "loss": 1.5556, "step": 26658 }, { "epoch": 0.9547155621609039, "grad_norm": 1.319137454032898, "learning_rate": 1.0728231850718363e-06, "loss": 1.6508, "step": 26659 }, { "epoch": 0.9547513742904722, "grad_norm": 1.5470972061157227, "learning_rate": 1.071129387241865e-06, "loss": 1.492, "step": 26660 }, { "epoch": 0.9547871864200405, "grad_norm": 1.630874752998352, "learning_rate": 1.0694369203722354e-06, "loss": 1.7498, "step": 26661 }, { "epoch": 0.9548229985496087, "grad_norm": 2.3035964965820312, "learning_rate": 1.0677457844857186e-06, "loss": 1.5807, "step": 26662 }, { "epoch": 0.954858810679177, "grad_norm": 1.6051185131072998, "learning_rate": 1.0660559796050739e-06, "loss": 1.4429, "step": 26663 }, { "epoch": 0.9548946228087454, "grad_norm": 2.025474786758423, "learning_rate": 1.0643675057530166e-06, "loss": 1.2662, "step": 26664 }, { "epoch": 0.9549304349383136, "grad_norm": 1.6414517164230347, "learning_rate": 1.0626803629522951e-06, "loss": 0.994, "step": 26665 }, { "epoch": 0.9549662470678819, "grad_norm": 1.5560110807418823, "learning_rate": 1.0609945512255692e-06, "loss": 1.2765, "step": 26666 }, { "epoch": 0.9550020591974502, "grad_norm": 1.2948863506317139, "learning_rate": 1.0593100705955538e-06, "loss": 1.4118, "step": 26667 }, { "epoch": 0.9550378713270185, "grad_norm": 1.3893499374389648, "learning_rate": 1.0576269210848867e-06, "loss": 1.5091, "step": 26668 }, { "epoch": 0.9550736834565867, "grad_norm": 1.5306798219680786, "learning_rate": 1.055945102716227e-06, "loss": 1.2156, "step": 26669 }, { "epoch": 0.955109495586155, "grad_norm": 1.7268040180206299, "learning_rate": 1.0542646155122015e-06, "loss": 1.7405, "step": 26670 }, { "epoch": 0.9551453077157234, "grad_norm": 2.0746474266052246, "learning_rate": 1.0525854594954143e-06, "loss": 1.3419, "step": 26671 }, { "epoch": 0.9551811198452916, "grad_norm": 1.7448663711547852, "learning_rate": 1.0509076346884583e-06, "loss": 1.827, "step": 26672 }, { "epoch": 0.9552169319748599, "grad_norm": 1.811957597732544, "learning_rate": 1.0492311411138934e-06, "loss": 1.6032, "step": 26673 }, { "epoch": 0.9552527441044282, "grad_norm": 2.3352155685424805, "learning_rate": 1.0475559787943012e-06, "loss": 1.2959, "step": 26674 }, { "epoch": 0.9552885562339964, "grad_norm": 1.8025147914886475, "learning_rate": 1.0458821477521974e-06, "loss": 1.5484, "step": 26675 }, { "epoch": 0.9553243683635647, "grad_norm": 1.6638069152832031, "learning_rate": 1.0442096480101082e-06, "loss": 1.1597, "step": 26676 }, { "epoch": 0.955360180493133, "grad_norm": 1.7823617458343506, "learning_rate": 1.042538479590527e-06, "loss": 1.7342, "step": 26677 }, { "epoch": 0.9553959926227014, "grad_norm": 1.538569450378418, "learning_rate": 1.0408686425159574e-06, "loss": 1.3907, "step": 26678 }, { "epoch": 0.9554318047522696, "grad_norm": 1.5950886011123657, "learning_rate": 1.0392001368088377e-06, "loss": 1.4772, "step": 26679 }, { "epoch": 0.9554676168818379, "grad_norm": 2.025176525115967, "learning_rate": 1.0375329624916386e-06, "loss": 1.4865, "step": 26680 }, { "epoch": 0.9555034290114062, "grad_norm": 1.6462210416793823, "learning_rate": 1.0358671195867865e-06, "loss": 1.2826, "step": 26681 }, { "epoch": 0.9555392411409744, "grad_norm": 1.741531491279602, "learning_rate": 1.0342026081166745e-06, "loss": 1.3633, "step": 26682 }, { "epoch": 0.9555750532705427, "grad_norm": 1.3028637170791626, "learning_rate": 1.0325394281037293e-06, "loss": 1.3505, "step": 26683 }, { "epoch": 0.955610865400111, "grad_norm": 1.8471804857254028, "learning_rate": 1.0308775795702775e-06, "loss": 1.2296, "step": 26684 }, { "epoch": 0.9556466775296794, "grad_norm": 1.38747239112854, "learning_rate": 1.0292170625387342e-06, "loss": 1.4824, "step": 26685 }, { "epoch": 0.9556824896592476, "grad_norm": 1.3638286590576172, "learning_rate": 1.0275578770313933e-06, "loss": 1.3719, "step": 26686 }, { "epoch": 0.9557183017888159, "grad_norm": 1.4321017265319824, "learning_rate": 1.025900023070614e-06, "loss": 1.4655, "step": 26687 }, { "epoch": 0.9557541139183842, "grad_norm": 1.3580139875411987, "learning_rate": 1.0242435006786677e-06, "loss": 1.5321, "step": 26688 }, { "epoch": 0.9557899260479524, "grad_norm": 1.8781472444534302, "learning_rate": 1.0225883098778588e-06, "loss": 1.3467, "step": 26689 }, { "epoch": 0.9558257381775207, "grad_norm": 1.5805599689483643, "learning_rate": 1.0209344506904694e-06, "loss": 1.4534, "step": 26690 }, { "epoch": 0.955861550307089, "grad_norm": 1.7332732677459717, "learning_rate": 1.019281923138715e-06, "loss": 1.4556, "step": 26691 }, { "epoch": 0.9558973624366573, "grad_norm": 1.5968459844589233, "learning_rate": 1.0176307272448448e-06, "loss": 1.3131, "step": 26692 }, { "epoch": 0.9559331745662256, "grad_norm": 1.7155627012252808, "learning_rate": 1.015980863031074e-06, "loss": 1.657, "step": 26693 }, { "epoch": 0.9559689866957939, "grad_norm": 1.7028433084487915, "learning_rate": 1.0143323305196184e-06, "loss": 1.4293, "step": 26694 }, { "epoch": 0.9560047988253622, "grad_norm": 1.3736257553100586, "learning_rate": 1.0126851297326157e-06, "loss": 1.1688, "step": 26695 }, { "epoch": 0.9560406109549304, "grad_norm": 1.3911411762237549, "learning_rate": 1.0110392606922703e-06, "loss": 1.3592, "step": 26696 }, { "epoch": 0.9560764230844987, "grad_norm": 1.8647515773773193, "learning_rate": 1.0093947234206868e-06, "loss": 1.5904, "step": 26697 }, { "epoch": 0.956112235214067, "grad_norm": 2.0917210578918457, "learning_rate": 1.0077515179400254e-06, "loss": 1.5137, "step": 26698 }, { "epoch": 0.9561480473436353, "grad_norm": 1.5692753791809082, "learning_rate": 1.0061096442723683e-06, "loss": 1.1976, "step": 26699 }, { "epoch": 0.9561838594732036, "grad_norm": 1.5351765155792236, "learning_rate": 1.004469102439809e-06, "loss": 1.2061, "step": 26700 }, { "epoch": 0.9562196716027719, "grad_norm": 1.3395408391952515, "learning_rate": 1.0028298924644408e-06, "loss": 1.1257, "step": 26701 }, { "epoch": 0.9562554837323402, "grad_norm": 1.8311903476715088, "learning_rate": 1.0011920143682796e-06, "loss": 1.3361, "step": 26702 }, { "epoch": 0.9562912958619084, "grad_norm": 1.7517163753509521, "learning_rate": 9.995554681733855e-07, "loss": 1.0912, "step": 26703 }, { "epoch": 0.9563271079914767, "grad_norm": 1.5621646642684937, "learning_rate": 9.97920253901774e-07, "loss": 1.7053, "step": 26704 }, { "epoch": 0.956362920121045, "grad_norm": 1.5236084461212158, "learning_rate": 9.96286371575439e-07, "loss": 1.6629, "step": 26705 }, { "epoch": 0.9563987322506133, "grad_norm": 2.2787911891937256, "learning_rate": 9.946538212163736e-07, "loss": 1.6588, "step": 26706 }, { "epoch": 0.9564345443801816, "grad_norm": 1.7489440441131592, "learning_rate": 9.930226028465272e-07, "loss": 1.5958, "step": 26707 }, { "epoch": 0.9564703565097499, "grad_norm": 2.500220775604248, "learning_rate": 9.913927164878488e-07, "loss": 1.5689, "step": 26708 }, { "epoch": 0.9565061686393181, "grad_norm": 1.724913239479065, "learning_rate": 9.897641621622765e-07, "loss": 1.1326, "step": 26709 }, { "epoch": 0.9565419807688864, "grad_norm": 2.303943634033203, "learning_rate": 9.88136939891704e-07, "loss": 1.4504, "step": 26710 }, { "epoch": 0.9565777928984547, "grad_norm": 2.162876844406128, "learning_rate": 9.865110496980356e-07, "loss": 1.5306, "step": 26711 }, { "epoch": 0.956613605028023, "grad_norm": 1.4706625938415527, "learning_rate": 9.84886491603154e-07, "loss": 1.3547, "step": 26712 }, { "epoch": 0.9566494171575913, "grad_norm": 1.7020697593688965, "learning_rate": 9.832632656288864e-07, "loss": 1.5613, "step": 26713 }, { "epoch": 0.9566852292871596, "grad_norm": 1.5223006010055542, "learning_rate": 9.81641371797104e-07, "loss": 1.5805, "step": 26714 }, { "epoch": 0.9567210414167279, "grad_norm": 1.1612199544906616, "learning_rate": 9.800208101296115e-07, "loss": 1.5998, "step": 26715 }, { "epoch": 0.9567568535462961, "grad_norm": 1.6327825784683228, "learning_rate": 9.784015806482028e-07, "loss": 1.1863, "step": 26716 }, { "epoch": 0.9567926656758644, "grad_norm": 1.9279059171676636, "learning_rate": 9.767836833746714e-07, "loss": 1.4357, "step": 26717 }, { "epoch": 0.9568284778054327, "grad_norm": 1.5725735425949097, "learning_rate": 9.751671183307888e-07, "loss": 1.4529, "step": 26718 }, { "epoch": 0.956864289935001, "grad_norm": 1.6223695278167725, "learning_rate": 9.735518855383152e-07, "loss": 1.4732, "step": 26719 }, { "epoch": 0.9569001020645693, "grad_norm": 2.1366162300109863, "learning_rate": 9.719379850189447e-07, "loss": 1.4565, "step": 26720 }, { "epoch": 0.9569359141941376, "grad_norm": 2.1586503982543945, "learning_rate": 9.703254167944154e-07, "loss": 1.2536, "step": 26721 }, { "epoch": 0.9569717263237059, "grad_norm": 2.3456790447235107, "learning_rate": 9.68714180886421e-07, "loss": 1.3688, "step": 26722 }, { "epoch": 0.9570075384532741, "grad_norm": 1.5606224536895752, "learning_rate": 9.67104277316644e-07, "loss": 1.6539, "step": 26723 }, { "epoch": 0.9570433505828424, "grad_norm": 1.9346030950546265, "learning_rate": 9.654957061067228e-07, "loss": 1.4414, "step": 26724 }, { "epoch": 0.9570791627124107, "grad_norm": 1.4974099397659302, "learning_rate": 9.638884672783176e-07, "loss": 1.3323, "step": 26725 }, { "epoch": 0.9571149748419789, "grad_norm": 1.7413378953933716, "learning_rate": 9.622825608530561e-07, "loss": 1.1622, "step": 26726 }, { "epoch": 0.9571507869715473, "grad_norm": 1.3514119386672974, "learning_rate": 9.606779868525206e-07, "loss": 1.0968, "step": 26727 }, { "epoch": 0.9571865991011156, "grad_norm": 1.4090226888656616, "learning_rate": 9.590747452983161e-07, "loss": 1.6259, "step": 26728 }, { "epoch": 0.9572224112306839, "grad_norm": 1.9629863500595093, "learning_rate": 9.574728362120033e-07, "loss": 1.0267, "step": 26729 }, { "epoch": 0.9572582233602521, "grad_norm": 1.4839998483657837, "learning_rate": 9.558722596151425e-07, "loss": 1.3511, "step": 26730 }, { "epoch": 0.9572940354898204, "grad_norm": 1.6294034719467163, "learning_rate": 9.5427301552925e-07, "loss": 1.4625, "step": 26731 }, { "epoch": 0.9573298476193887, "grad_norm": 1.5633814334869385, "learning_rate": 9.526751039758641e-07, "loss": 1.6409, "step": 26732 }, { "epoch": 0.9573656597489569, "grad_norm": 1.6685106754302979, "learning_rate": 9.510785249764786e-07, "loss": 1.4664, "step": 26733 }, { "epoch": 0.9574014718785253, "grad_norm": 1.2557251453399658, "learning_rate": 9.494832785525653e-07, "loss": 1.4488, "step": 26734 }, { "epoch": 0.9574372840080936, "grad_norm": 1.9086263179779053, "learning_rate": 9.478893647255849e-07, "loss": 1.2848, "step": 26735 }, { "epoch": 0.9574730961376618, "grad_norm": 1.5768110752105713, "learning_rate": 9.462967835169756e-07, "loss": 1.4434, "step": 26736 }, { "epoch": 0.9575089082672301, "grad_norm": 1.5403549671173096, "learning_rate": 9.44705534948187e-07, "loss": 1.4612, "step": 26737 }, { "epoch": 0.9575447203967984, "grad_norm": 1.6581268310546875, "learning_rate": 9.431156190406131e-07, "loss": 1.2959, "step": 26738 }, { "epoch": 0.9575805325263667, "grad_norm": 1.4390212297439575, "learning_rate": 9.41527035815637e-07, "loss": 1.2648, "step": 26739 }, { "epoch": 0.9576163446559349, "grad_norm": 1.7185239791870117, "learning_rate": 9.399397852946413e-07, "loss": 1.5569, "step": 26740 }, { "epoch": 0.9576521567855032, "grad_norm": 2.195068359375, "learning_rate": 9.383538674989756e-07, "loss": 1.7491, "step": 26741 }, { "epoch": 0.9576879689150716, "grad_norm": 1.4795840978622437, "learning_rate": 9.367692824499786e-07, "loss": 1.5406, "step": 26742 }, { "epoch": 0.9577237810446398, "grad_norm": 1.736876130104065, "learning_rate": 9.351860301689775e-07, "loss": 1.2074, "step": 26743 }, { "epoch": 0.9577595931742081, "grad_norm": 1.4114105701446533, "learning_rate": 9.336041106772553e-07, "loss": 1.3731, "step": 26744 }, { "epoch": 0.9577954053037764, "grad_norm": 1.434982180595398, "learning_rate": 9.320235239961061e-07, "loss": 1.257, "step": 26745 }, { "epoch": 0.9578312174333447, "grad_norm": 2.827622413635254, "learning_rate": 9.304442701467908e-07, "loss": 1.6111, "step": 26746 }, { "epoch": 0.9578670295629129, "grad_norm": 2.319575786590576, "learning_rate": 9.288663491505478e-07, "loss": 1.4424, "step": 26747 }, { "epoch": 0.9579028416924812, "grad_norm": 1.5229041576385498, "learning_rate": 9.27289761028638e-07, "loss": 1.2918, "step": 26748 }, { "epoch": 0.9579386538220496, "grad_norm": 1.6973974704742432, "learning_rate": 9.257145058022331e-07, "loss": 1.3539, "step": 26749 }, { "epoch": 0.9579744659516178, "grad_norm": 1.4195101261138916, "learning_rate": 9.241405834925388e-07, "loss": 1.3205, "step": 26750 }, { "epoch": 0.9580102780811861, "grad_norm": 1.6695009469985962, "learning_rate": 9.225679941207488e-07, "loss": 1.4648, "step": 26751 }, { "epoch": 0.9580460902107544, "grad_norm": 1.8575265407562256, "learning_rate": 9.20996737708002e-07, "loss": 1.4749, "step": 26752 }, { "epoch": 0.9580819023403226, "grad_norm": 1.3980778455734253, "learning_rate": 9.19426814275437e-07, "loss": 1.5525, "step": 26753 }, { "epoch": 0.9581177144698909, "grad_norm": 1.5089133977890015, "learning_rate": 9.178582238441702e-07, "loss": 1.1665, "step": 26754 }, { "epoch": 0.9581535265994592, "grad_norm": 1.390755534172058, "learning_rate": 9.162909664353292e-07, "loss": 1.6808, "step": 26755 }, { "epoch": 0.9581893387290276, "grad_norm": 1.4605809450149536, "learning_rate": 9.14725042069986e-07, "loss": 1.5083, "step": 26756 }, { "epoch": 0.9582251508585958, "grad_norm": 1.6914252042770386, "learning_rate": 9.131604507691904e-07, "loss": 1.635, "step": 26757 }, { "epoch": 0.9582609629881641, "grad_norm": 1.5005120038986206, "learning_rate": 9.115971925540257e-07, "loss": 1.3975, "step": 26758 }, { "epoch": 0.9582967751177324, "grad_norm": 1.4641746282577515, "learning_rate": 9.100352674454971e-07, "loss": 1.4, "step": 26759 }, { "epoch": 0.9583325872473006, "grad_norm": 1.9565989971160889, "learning_rate": 9.084746754646323e-07, "loss": 1.4052, "step": 26760 }, { "epoch": 0.9583683993768689, "grad_norm": 1.6255415678024292, "learning_rate": 9.069154166324146e-07, "loss": 1.3676, "step": 26761 }, { "epoch": 0.9584042115064372, "grad_norm": 2.551762104034424, "learning_rate": 9.053574909698381e-07, "loss": 1.4617, "step": 26762 }, { "epoch": 0.9584400236360056, "grad_norm": 1.3324257135391235, "learning_rate": 9.038008984978419e-07, "loss": 1.3645, "step": 26763 }, { "epoch": 0.9584758357655738, "grad_norm": 3.2408790588378906, "learning_rate": 9.022456392373868e-07, "loss": 1.3834, "step": 26764 }, { "epoch": 0.9585116478951421, "grad_norm": 1.4632230997085571, "learning_rate": 9.006917132093895e-07, "loss": 1.0799, "step": 26765 }, { "epoch": 0.9585474600247104, "grad_norm": 1.6773384809494019, "learning_rate": 8.991391204347555e-07, "loss": 1.4687, "step": 26766 }, { "epoch": 0.9585832721542786, "grad_norm": 1.8148605823516846, "learning_rate": 8.97587860934368e-07, "loss": 1.2124, "step": 26767 }, { "epoch": 0.9586190842838469, "grad_norm": 1.7556577920913696, "learning_rate": 8.960379347291103e-07, "loss": 1.6248, "step": 26768 }, { "epoch": 0.9586548964134152, "grad_norm": 2.0786924362182617, "learning_rate": 8.944893418398326e-07, "loss": 1.5726, "step": 26769 }, { "epoch": 0.9586907085429835, "grad_norm": 1.6416964530944824, "learning_rate": 8.929420822873513e-07, "loss": 1.4571, "step": 26770 }, { "epoch": 0.9587265206725518, "grad_norm": 1.4926072359085083, "learning_rate": 8.913961560925055e-07, "loss": 1.3523, "step": 26771 }, { "epoch": 0.9587623328021201, "grad_norm": 1.511873483657837, "learning_rate": 8.898515632760784e-07, "loss": 1.0218, "step": 26772 }, { "epoch": 0.9587981449316884, "grad_norm": 1.5736192464828491, "learning_rate": 8.883083038588536e-07, "loss": 1.7893, "step": 26773 }, { "epoch": 0.9588339570612566, "grad_norm": 1.9865953922271729, "learning_rate": 8.867663778616031e-07, "loss": 1.5569, "step": 26774 }, { "epoch": 0.9588697691908249, "grad_norm": 1.5352931022644043, "learning_rate": 8.852257853050661e-07, "loss": 1.7685, "step": 26775 }, { "epoch": 0.9589055813203932, "grad_norm": 2.0710299015045166, "learning_rate": 8.836865262099481e-07, "loss": 1.5833, "step": 26776 }, { "epoch": 0.9589413934499615, "grad_norm": 1.8323180675506592, "learning_rate": 8.821486005969992e-07, "loss": 1.1984, "step": 26777 }, { "epoch": 0.9589772055795298, "grad_norm": 1.1376245021820068, "learning_rate": 8.806120084868807e-07, "loss": 1.3374, "step": 26778 }, { "epoch": 0.9590130177090981, "grad_norm": 1.5147979259490967, "learning_rate": 8.79076749900265e-07, "loss": 1.4927, "step": 26779 }, { "epoch": 0.9590488298386664, "grad_norm": 1.8162474632263184, "learning_rate": 8.775428248578243e-07, "loss": 1.8344, "step": 26780 }, { "epoch": 0.9590846419682346, "grad_norm": 1.5417896509170532, "learning_rate": 8.760102333801756e-07, "loss": 1.3234, "step": 26781 }, { "epoch": 0.9591204540978029, "grad_norm": 1.8801335096359253, "learning_rate": 8.744789754879579e-07, "loss": 1.6405, "step": 26782 }, { "epoch": 0.9591562662273712, "grad_norm": 1.4927922487258911, "learning_rate": 8.729490512017547e-07, "loss": 1.3414, "step": 26783 }, { "epoch": 0.9591920783569395, "grad_norm": 1.7957075834274292, "learning_rate": 8.714204605421716e-07, "loss": 1.4618, "step": 26784 }, { "epoch": 0.9592278904865078, "grad_norm": 1.5633784532546997, "learning_rate": 8.69893203529748e-07, "loss": 1.208, "step": 26785 }, { "epoch": 0.9592637026160761, "grad_norm": 2.0784788131713867, "learning_rate": 8.683672801850451e-07, "loss": 1.613, "step": 26786 }, { "epoch": 0.9592995147456443, "grad_norm": 1.31596040725708, "learning_rate": 8.668426905285909e-07, "loss": 0.8767, "step": 26787 }, { "epoch": 0.9593353268752126, "grad_norm": 1.5244179964065552, "learning_rate": 8.653194345808913e-07, "loss": 1.4995, "step": 26788 }, { "epoch": 0.9593711390047809, "grad_norm": 1.6521975994110107, "learning_rate": 8.63797512362452e-07, "loss": 1.2428, "step": 26789 }, { "epoch": 0.9594069511343492, "grad_norm": 1.871695637702942, "learning_rate": 8.622769238937345e-07, "loss": 1.2734, "step": 26790 }, { "epoch": 0.9594427632639175, "grad_norm": 1.8507094383239746, "learning_rate": 8.607576691952002e-07, "loss": 1.5353, "step": 26791 }, { "epoch": 0.9594785753934858, "grad_norm": 1.6126267910003662, "learning_rate": 8.592397482872993e-07, "loss": 1.3408, "step": 26792 }, { "epoch": 0.9595143875230541, "grad_norm": 1.529656171798706, "learning_rate": 8.577231611904379e-07, "loss": 1.6628, "step": 26793 }, { "epoch": 0.9595501996526223, "grad_norm": 1.551055908203125, "learning_rate": 8.562079079250219e-07, "loss": 1.3079, "step": 26794 }, { "epoch": 0.9595860117821906, "grad_norm": 1.8348814249038696, "learning_rate": 8.546939885114569e-07, "loss": 1.394, "step": 26795 }, { "epoch": 0.9596218239117589, "grad_norm": 1.4194611310958862, "learning_rate": 8.531814029700935e-07, "loss": 1.3471, "step": 26796 }, { "epoch": 0.9596576360413271, "grad_norm": 1.5185151100158691, "learning_rate": 8.516701513212821e-07, "loss": 1.5099, "step": 26797 }, { "epoch": 0.9596934481708955, "grad_norm": 1.589985728263855, "learning_rate": 8.501602335853509e-07, "loss": 1.2476, "step": 26798 }, { "epoch": 0.9597292603004638, "grad_norm": 2.1401360034942627, "learning_rate": 8.48651649782628e-07, "loss": 1.6254, "step": 26799 }, { "epoch": 0.9597650724300321, "grad_norm": 1.7402527332305908, "learning_rate": 8.471443999333972e-07, "loss": 1.423, "step": 26800 }, { "epoch": 0.9598008845596003, "grad_norm": 1.600137710571289, "learning_rate": 8.456384840579423e-07, "loss": 1.5019, "step": 26801 }, { "epoch": 0.9598366966891686, "grad_norm": 1.6320627927780151, "learning_rate": 8.441339021765138e-07, "loss": 1.3925, "step": 26802 }, { "epoch": 0.9598725088187369, "grad_norm": 2.637521266937256, "learning_rate": 8.426306543093732e-07, "loss": 1.6728, "step": 26803 }, { "epoch": 0.9599083209483051, "grad_norm": 1.9787664413452148, "learning_rate": 8.411287404767265e-07, "loss": 1.6501, "step": 26804 }, { "epoch": 0.9599441330778735, "grad_norm": 1.341884970664978, "learning_rate": 8.396281606987799e-07, "loss": 1.4126, "step": 26805 }, { "epoch": 0.9599799452074418, "grad_norm": 1.7115846872329712, "learning_rate": 8.381289149957395e-07, "loss": 1.5408, "step": 26806 }, { "epoch": 0.9600157573370101, "grad_norm": 1.4531255960464478, "learning_rate": 8.366310033877667e-07, "loss": 1.589, "step": 26807 }, { "epoch": 0.9600515694665783, "grad_norm": 2.3292057514190674, "learning_rate": 8.351344258950123e-07, "loss": 1.8701, "step": 26808 }, { "epoch": 0.9600873815961466, "grad_norm": 1.7623049020767212, "learning_rate": 8.336391825376044e-07, "loss": 1.3654, "step": 26809 }, { "epoch": 0.9601231937257149, "grad_norm": 1.5028655529022217, "learning_rate": 8.321452733356605e-07, "loss": 1.4138, "step": 26810 }, { "epoch": 0.9601590058552831, "grad_norm": 1.9508914947509766, "learning_rate": 8.306526983092977e-07, "loss": 1.3763, "step": 26811 }, { "epoch": 0.9601948179848515, "grad_norm": 1.7204222679138184, "learning_rate": 8.291614574785777e-07, "loss": 1.3373, "step": 26812 }, { "epoch": 0.9602306301144198, "grad_norm": 1.9033406972885132, "learning_rate": 8.276715508635624e-07, "loss": 1.3409, "step": 26813 }, { "epoch": 0.960266442243988, "grad_norm": 1.5977619886398315, "learning_rate": 8.261829784843133e-07, "loss": 1.2249, "step": 26814 }, { "epoch": 0.9603022543735563, "grad_norm": 1.9996955394744873, "learning_rate": 8.246957403608479e-07, "loss": 1.5399, "step": 26815 }, { "epoch": 0.9603380665031246, "grad_norm": 1.686150312423706, "learning_rate": 8.232098365131613e-07, "loss": 1.2805, "step": 26816 }, { "epoch": 0.9603738786326929, "grad_norm": 1.4930355548858643, "learning_rate": 8.217252669612708e-07, "loss": 1.4063, "step": 26817 }, { "epoch": 0.9604096907622611, "grad_norm": 1.8459813594818115, "learning_rate": 8.20242031725138e-07, "loss": 1.5286, "step": 26818 }, { "epoch": 0.9604455028918295, "grad_norm": 1.9831894636154175, "learning_rate": 8.187601308247028e-07, "loss": 1.4814, "step": 26819 }, { "epoch": 0.9604813150213978, "grad_norm": 1.6175930500030518, "learning_rate": 8.172795642799269e-07, "loss": 1.24, "step": 26820 }, { "epoch": 0.960517127150966, "grad_norm": 1.5015861988067627, "learning_rate": 8.158003321107167e-07, "loss": 1.2342, "step": 26821 }, { "epoch": 0.9605529392805343, "grad_norm": 1.2254098653793335, "learning_rate": 8.143224343369671e-07, "loss": 1.3663, "step": 26822 }, { "epoch": 0.9605887514101026, "grad_norm": 1.7844117879867554, "learning_rate": 8.128458709785736e-07, "loss": 1.5133, "step": 26823 }, { "epoch": 0.9606245635396709, "grad_norm": 1.9229958057403564, "learning_rate": 8.113706420553868e-07, "loss": 1.7314, "step": 26824 }, { "epoch": 0.9606603756692391, "grad_norm": 1.4775229692459106, "learning_rate": 8.098967475872798e-07, "loss": 1.4693, "step": 26825 }, { "epoch": 0.9606961877988075, "grad_norm": 1.6402254104614258, "learning_rate": 8.084241875940591e-07, "loss": 1.3664, "step": 26826 }, { "epoch": 0.9607319999283758, "grad_norm": 1.7152255773544312, "learning_rate": 8.069529620955418e-07, "loss": 1.3495, "step": 26827 }, { "epoch": 0.960767812057944, "grad_norm": 1.5333672761917114, "learning_rate": 8.054830711115236e-07, "loss": 1.533, "step": 26828 }, { "epoch": 0.9608036241875123, "grad_norm": 1.5828067064285278, "learning_rate": 8.040145146617883e-07, "loss": 1.7531, "step": 26829 }, { "epoch": 0.9608394363170806, "grad_norm": 1.8916761875152588, "learning_rate": 8.025472927660649e-07, "loss": 1.0839, "step": 26830 }, { "epoch": 0.9608752484466488, "grad_norm": 1.5456327199935913, "learning_rate": 8.010814054441262e-07, "loss": 1.5393, "step": 26831 }, { "epoch": 0.9609110605762171, "grad_norm": 1.50007164478302, "learning_rate": 7.996168527156789e-07, "loss": 1.049, "step": 26832 }, { "epoch": 0.9609468727057855, "grad_norm": 1.552908182144165, "learning_rate": 7.981536346004292e-07, "loss": 1.4244, "step": 26833 }, { "epoch": 0.9609826848353538, "grad_norm": 1.4380766153335571, "learning_rate": 7.966917511180505e-07, "loss": 1.1892, "step": 26834 }, { "epoch": 0.961018496964922, "grad_norm": 2.1194546222686768, "learning_rate": 7.952312022882269e-07, "loss": 1.5398, "step": 26835 }, { "epoch": 0.9610543090944903, "grad_norm": 1.35401451587677, "learning_rate": 7.937719881306094e-07, "loss": 1.566, "step": 26836 }, { "epoch": 0.9610901212240586, "grad_norm": 2.0986156463623047, "learning_rate": 7.923141086648156e-07, "loss": 1.3092, "step": 26837 }, { "epoch": 0.9611259333536268, "grad_norm": 1.8315749168395996, "learning_rate": 7.908575639104631e-07, "loss": 1.5848, "step": 26838 }, { "epoch": 0.9611617454831951, "grad_norm": 1.6822844743728638, "learning_rate": 7.894023538871587e-07, "loss": 1.5722, "step": 26839 }, { "epoch": 0.9611975576127635, "grad_norm": 1.2145322561264038, "learning_rate": 7.879484786144753e-07, "loss": 1.2355, "step": 26840 }, { "epoch": 0.9612333697423318, "grad_norm": 1.5983326435089111, "learning_rate": 7.864959381119641e-07, "loss": 1.6364, "step": 26841 }, { "epoch": 0.9612691818719, "grad_norm": 1.8771876096725464, "learning_rate": 7.85044732399165e-07, "loss": 1.4767, "step": 26842 }, { "epoch": 0.9613049940014683, "grad_norm": 2.0207412242889404, "learning_rate": 7.83594861495629e-07, "loss": 1.4324, "step": 26843 }, { "epoch": 0.9613408061310366, "grad_norm": 1.887495517730713, "learning_rate": 7.821463254208405e-07, "loss": 1.4604, "step": 26844 }, { "epoch": 0.9613766182606048, "grad_norm": 1.4683293104171753, "learning_rate": 7.80699124194284e-07, "loss": 1.5779, "step": 26845 }, { "epoch": 0.9614124303901731, "grad_norm": 1.590532660484314, "learning_rate": 7.792532578354439e-07, "loss": 1.4709, "step": 26846 }, { "epoch": 0.9614482425197415, "grad_norm": 1.5941145420074463, "learning_rate": 7.778087263637601e-07, "loss": 1.5715, "step": 26847 }, { "epoch": 0.9614840546493097, "grad_norm": 2.0739402770996094, "learning_rate": 7.763655297986839e-07, "loss": 1.6764, "step": 26848 }, { "epoch": 0.961519866778878, "grad_norm": 1.8480732440948486, "learning_rate": 7.749236681595995e-07, "loss": 1.6399, "step": 26849 }, { "epoch": 0.9615556789084463, "grad_norm": 1.648714542388916, "learning_rate": 7.734831414659471e-07, "loss": 1.5086, "step": 26850 }, { "epoch": 0.9615914910380146, "grad_norm": 1.796379804611206, "learning_rate": 7.720439497370668e-07, "loss": 1.3986, "step": 26851 }, { "epoch": 0.9616273031675828, "grad_norm": 1.7396949529647827, "learning_rate": 7.706060929923542e-07, "loss": 1.1852, "step": 26852 }, { "epoch": 0.9616631152971511, "grad_norm": 1.374097228050232, "learning_rate": 7.691695712511382e-07, "loss": 1.4309, "step": 26853 }, { "epoch": 0.9616989274267195, "grad_norm": 1.4233494997024536, "learning_rate": 7.677343845327478e-07, "loss": 1.1654, "step": 26854 }, { "epoch": 0.9617347395562877, "grad_norm": 2.057396173477173, "learning_rate": 7.663005328564787e-07, "loss": 1.6129, "step": 26855 }, { "epoch": 0.961770551685856, "grad_norm": 2.265347480773926, "learning_rate": 7.648680162416489e-07, "loss": 1.3166, "step": 26856 }, { "epoch": 0.9618063638154243, "grad_norm": 1.4549821615219116, "learning_rate": 7.634368347075093e-07, "loss": 1.2215, "step": 26857 }, { "epoch": 0.9618421759449926, "grad_norm": 1.7157262563705444, "learning_rate": 7.620069882733227e-07, "loss": 1.5253, "step": 26858 }, { "epoch": 0.9618779880745608, "grad_norm": 2.227339506149292, "learning_rate": 7.605784769583291e-07, "loss": 1.5059, "step": 26859 }, { "epoch": 0.9619138002041291, "grad_norm": 1.5298230648040771, "learning_rate": 7.591513007817242e-07, "loss": 1.3268, "step": 26860 }, { "epoch": 0.9619496123336975, "grad_norm": 1.7910478115081787, "learning_rate": 7.577254597627481e-07, "loss": 1.3596, "step": 26861 }, { "epoch": 0.9619854244632657, "grad_norm": 1.3827205896377563, "learning_rate": 7.563009539205524e-07, "loss": 1.4843, "step": 26862 }, { "epoch": 0.962021236592834, "grad_norm": 1.5558743476867676, "learning_rate": 7.548777832743214e-07, "loss": 1.2898, "step": 26863 }, { "epoch": 0.9620570487224023, "grad_norm": 2.3799028396606445, "learning_rate": 7.534559478431735e-07, "loss": 1.2981, "step": 26864 }, { "epoch": 0.9620928608519705, "grad_norm": 1.3155205249786377, "learning_rate": 7.52035447646271e-07, "loss": 1.3632, "step": 26865 }, { "epoch": 0.9621286729815388, "grad_norm": 1.3187897205352783, "learning_rate": 7.506162827027097e-07, "loss": 1.3126, "step": 26866 }, { "epoch": 0.9621644851111071, "grad_norm": 1.7988380193710327, "learning_rate": 7.491984530315854e-07, "loss": 1.6965, "step": 26867 }, { "epoch": 0.9622002972406755, "grad_norm": 1.1581627130508423, "learning_rate": 7.477819586519719e-07, "loss": 1.1106, "step": 26868 }, { "epoch": 0.9622361093702437, "grad_norm": 1.507202386856079, "learning_rate": 7.463667995829205e-07, "loss": 1.4695, "step": 26869 }, { "epoch": 0.962271921499812, "grad_norm": 2.0441157817840576, "learning_rate": 7.449529758434826e-07, "loss": 2.2557, "step": 26870 }, { "epoch": 0.9623077336293803, "grad_norm": 1.7305978536605835, "learning_rate": 7.435404874526542e-07, "loss": 1.238, "step": 26871 }, { "epoch": 0.9623435457589485, "grad_norm": 1.8210771083831787, "learning_rate": 7.421293344294755e-07, "loss": 1.5418, "step": 26872 }, { "epoch": 0.9623793578885168, "grad_norm": 1.6746025085449219, "learning_rate": 7.407195167929093e-07, "loss": 1.2805, "step": 26873 }, { "epoch": 0.9624151700180851, "grad_norm": 1.6006801128387451, "learning_rate": 7.393110345619291e-07, "loss": 1.269, "step": 26874 }, { "epoch": 0.9624509821476535, "grad_norm": 2.0808916091918945, "learning_rate": 7.379038877554755e-07, "loss": 1.2495, "step": 26875 }, { "epoch": 0.9624867942772217, "grad_norm": 1.9244236946105957, "learning_rate": 7.364980763924889e-07, "loss": 1.3046, "step": 26876 }, { "epoch": 0.96252260640679, "grad_norm": 2.2387166023254395, "learning_rate": 7.350936004918873e-07, "loss": 1.4992, "step": 26877 }, { "epoch": 0.9625584185363583, "grad_norm": 1.9266955852508545, "learning_rate": 7.336904600725447e-07, "loss": 1.306, "step": 26878 }, { "epoch": 0.9625942306659265, "grad_norm": 1.6920135021209717, "learning_rate": 7.322886551533681e-07, "loss": 1.6659, "step": 26879 }, { "epoch": 0.9626300427954948, "grad_norm": 1.9262080192565918, "learning_rate": 7.308881857531869e-07, "loss": 1.4505, "step": 26880 }, { "epoch": 0.9626658549250631, "grad_norm": 1.438104271888733, "learning_rate": 7.294890518908748e-07, "loss": 1.3511, "step": 26881 }, { "epoch": 0.9627016670546314, "grad_norm": 1.2074106931686401, "learning_rate": 7.280912535852169e-07, "loss": 1.4729, "step": 26882 }, { "epoch": 0.9627374791841997, "grad_norm": 1.594930648803711, "learning_rate": 7.266947908550536e-07, "loss": 1.2189, "step": 26883 }, { "epoch": 0.962773291313768, "grad_norm": 1.5803894996643066, "learning_rate": 7.252996637191589e-07, "loss": 1.0278, "step": 26884 }, { "epoch": 0.9628091034433363, "grad_norm": 1.9029607772827148, "learning_rate": 7.239058721962954e-07, "loss": 1.3136, "step": 26885 }, { "epoch": 0.9628449155729045, "grad_norm": 2.055635452270508, "learning_rate": 7.22513416305226e-07, "loss": 1.294, "step": 26886 }, { "epoch": 0.9628807277024728, "grad_norm": 1.8848527669906616, "learning_rate": 7.211222960646691e-07, "loss": 1.4366, "step": 26887 }, { "epoch": 0.9629165398320411, "grad_norm": 1.2585186958312988, "learning_rate": 7.197325114933651e-07, "loss": 1.1339, "step": 26888 }, { "epoch": 0.9629523519616094, "grad_norm": 1.8972463607788086, "learning_rate": 7.18344062609988e-07, "loss": 1.5986, "step": 26889 }, { "epoch": 0.9629881640911777, "grad_norm": 2.0489518642425537, "learning_rate": 7.16956949433234e-07, "loss": 1.325, "step": 26890 }, { "epoch": 0.963023976220746, "grad_norm": 1.7141057252883911, "learning_rate": 7.155711719817548e-07, "loss": 1.3843, "step": 26891 }, { "epoch": 0.9630597883503142, "grad_norm": 1.6005711555480957, "learning_rate": 7.141867302742023e-07, "loss": 1.265, "step": 26892 }, { "epoch": 0.9630956004798825, "grad_norm": 1.6023467779159546, "learning_rate": 7.128036243291947e-07, "loss": 1.2884, "step": 26893 }, { "epoch": 0.9631314126094508, "grad_norm": 1.2686173915863037, "learning_rate": 7.114218541653395e-07, "loss": 1.1523, "step": 26894 }, { "epoch": 0.9631672247390191, "grad_norm": 1.4661974906921387, "learning_rate": 7.100414198012439e-07, "loss": 1.2727, "step": 26895 }, { "epoch": 0.9632030368685874, "grad_norm": 1.7659021615982056, "learning_rate": 7.086623212554488e-07, "loss": 1.5836, "step": 26896 }, { "epoch": 0.9632388489981557, "grad_norm": 1.6870405673980713, "learning_rate": 7.072845585465282e-07, "loss": 1.3479, "step": 26897 }, { "epoch": 0.963274661127724, "grad_norm": 1.9497534036636353, "learning_rate": 7.059081316930227e-07, "loss": 1.334, "step": 26898 }, { "epoch": 0.9633104732572922, "grad_norm": 1.5201635360717773, "learning_rate": 7.045330407134398e-07, "loss": 1.5913, "step": 26899 }, { "epoch": 0.9633462853868605, "grad_norm": 1.6073867082595825, "learning_rate": 7.03159285626287e-07, "loss": 1.531, "step": 26900 }, { "epoch": 0.9633820975164288, "grad_norm": 2.0254600048065186, "learning_rate": 7.017868664500382e-07, "loss": 1.5583, "step": 26901 }, { "epoch": 0.963417909645997, "grad_norm": 2.1424214839935303, "learning_rate": 7.004157832031677e-07, "loss": 1.5673, "step": 26902 }, { "epoch": 0.9634537217755654, "grad_norm": 1.7584158182144165, "learning_rate": 6.990460359041051e-07, "loss": 1.17, "step": 26903 }, { "epoch": 0.9634895339051337, "grad_norm": 1.9335455894470215, "learning_rate": 6.976776245712913e-07, "loss": 1.464, "step": 26904 }, { "epoch": 0.963525346034702, "grad_norm": 1.4011025428771973, "learning_rate": 6.963105492231336e-07, "loss": 1.5505, "step": 26905 }, { "epoch": 0.9635611581642702, "grad_norm": 1.5605167150497437, "learning_rate": 6.949448098780398e-07, "loss": 1.2234, "step": 26906 }, { "epoch": 0.9635969702938385, "grad_norm": 1.5061067342758179, "learning_rate": 6.935804065543505e-07, "loss": 1.1857, "step": 26907 }, { "epoch": 0.9636327824234068, "grad_norm": 1.4534320831298828, "learning_rate": 6.922173392704512e-07, "loss": 1.625, "step": 26908 }, { "epoch": 0.963668594552975, "grad_norm": 1.4374990463256836, "learning_rate": 6.908556080446715e-07, "loss": 1.3349, "step": 26909 }, { "epoch": 0.9637044066825434, "grad_norm": 1.656003475189209, "learning_rate": 6.894952128953191e-07, "loss": 1.3548, "step": 26910 }, { "epoch": 0.9637402188121117, "grad_norm": 1.9220056533813477, "learning_rate": 6.881361538407127e-07, "loss": 1.3714, "step": 26911 }, { "epoch": 0.96377603094168, "grad_norm": 1.139539361000061, "learning_rate": 6.867784308991266e-07, "loss": 1.3443, "step": 26912 }, { "epoch": 0.9638118430712482, "grad_norm": 1.2987382411956787, "learning_rate": 6.854220440888459e-07, "loss": 1.4791, "step": 26913 }, { "epoch": 0.9638476552008165, "grad_norm": 1.2175135612487793, "learning_rate": 6.840669934280897e-07, "loss": 1.3745, "step": 26914 }, { "epoch": 0.9638834673303848, "grad_norm": 1.5563571453094482, "learning_rate": 6.827132789351098e-07, "loss": 1.6409, "step": 26915 }, { "epoch": 0.963919279459953, "grad_norm": 1.1882492303848267, "learning_rate": 6.813609006281141e-07, "loss": 1.41, "step": 26916 }, { "epoch": 0.9639550915895214, "grad_norm": 1.3806182146072388, "learning_rate": 6.800098585252989e-07, "loss": 1.3614, "step": 26917 }, { "epoch": 0.9639909037190897, "grad_norm": 1.8224741220474243, "learning_rate": 6.786601526448277e-07, "loss": 1.3087, "step": 26918 }, { "epoch": 0.964026715848658, "grad_norm": 1.6787079572677612, "learning_rate": 6.773117830048747e-07, "loss": 1.5826, "step": 26919 }, { "epoch": 0.9640625279782262, "grad_norm": 2.0062367916107178, "learning_rate": 6.75964749623581e-07, "loss": 1.6547, "step": 26920 }, { "epoch": 0.9640983401077945, "grad_norm": 1.6232973337173462, "learning_rate": 6.746190525190543e-07, "loss": 1.529, "step": 26921 }, { "epoch": 0.9641341522373628, "grad_norm": 1.5797841548919678, "learning_rate": 6.732746917094135e-07, "loss": 1.5343, "step": 26922 }, { "epoch": 0.964169964366931, "grad_norm": 1.5757808685302734, "learning_rate": 6.719316672127329e-07, "loss": 1.5117, "step": 26923 }, { "epoch": 0.9642057764964994, "grad_norm": 1.458806037902832, "learning_rate": 6.70589979047087e-07, "loss": 1.3089, "step": 26924 }, { "epoch": 0.9642415886260677, "grad_norm": 1.3439996242523193, "learning_rate": 6.692496272305282e-07, "loss": 1.4445, "step": 26925 }, { "epoch": 0.964277400755636, "grad_norm": 1.373887538909912, "learning_rate": 6.679106117810974e-07, "loss": 1.4416, "step": 26926 }, { "epoch": 0.9643132128852042, "grad_norm": 1.9046086072921753, "learning_rate": 6.665729327167913e-07, "loss": 1.7455, "step": 26927 }, { "epoch": 0.9643490250147725, "grad_norm": 1.4031591415405273, "learning_rate": 6.652365900556179e-07, "loss": 1.5515, "step": 26928 }, { "epoch": 0.9643848371443408, "grad_norm": 1.617029070854187, "learning_rate": 6.639015838155515e-07, "loss": 1.713, "step": 26929 }, { "epoch": 0.964420649273909, "grad_norm": 1.4018628597259521, "learning_rate": 6.625679140145557e-07, "loss": 0.8941, "step": 26930 }, { "epoch": 0.9644564614034774, "grad_norm": 1.6945066452026367, "learning_rate": 6.612355806705828e-07, "loss": 1.3377, "step": 26931 }, { "epoch": 0.9644922735330457, "grad_norm": 1.6313774585723877, "learning_rate": 6.599045838015294e-07, "loss": 1.0908, "step": 26932 }, { "epoch": 0.9645280856626139, "grad_norm": 1.6346851587295532, "learning_rate": 6.585749234253258e-07, "loss": 1.3156, "step": 26933 }, { "epoch": 0.9645638977921822, "grad_norm": 2.169135570526123, "learning_rate": 6.572465995598575e-07, "loss": 1.5391, "step": 26934 }, { "epoch": 0.9645997099217505, "grad_norm": 1.4126479625701904, "learning_rate": 6.559196122229994e-07, "loss": 1.5252, "step": 26935 }, { "epoch": 0.9646355220513188, "grad_norm": 2.0156967639923096, "learning_rate": 6.545939614325924e-07, "loss": 1.5874, "step": 26936 }, { "epoch": 0.964671334180887, "grad_norm": 1.959639310836792, "learning_rate": 6.532696472064781e-07, "loss": 1.4451, "step": 26937 }, { "epoch": 0.9647071463104554, "grad_norm": 1.6670535802841187, "learning_rate": 6.519466695624755e-07, "loss": 1.7676, "step": 26938 }, { "epoch": 0.9647429584400237, "grad_norm": 1.3304088115692139, "learning_rate": 6.506250285183812e-07, "loss": 1.3935, "step": 26939 }, { "epoch": 0.9647787705695919, "grad_norm": 1.6989744901657104, "learning_rate": 6.493047240919703e-07, "loss": 1.3568, "step": 26940 }, { "epoch": 0.9648145826991602, "grad_norm": 1.3269007205963135, "learning_rate": 6.479857563010062e-07, "loss": 1.2906, "step": 26941 }, { "epoch": 0.9648503948287285, "grad_norm": 1.288854956626892, "learning_rate": 6.466681251632522e-07, "loss": 1.3047, "step": 26942 }, { "epoch": 0.9648862069582967, "grad_norm": 1.3249443769454956, "learning_rate": 6.453518306964168e-07, "loss": 1.4381, "step": 26943 }, { "epoch": 0.964922019087865, "grad_norm": 1.3801206350326538, "learning_rate": 6.440368729182078e-07, "loss": 1.4704, "step": 26944 }, { "epoch": 0.9649578312174334, "grad_norm": 1.7521178722381592, "learning_rate": 6.427232518463333e-07, "loss": 1.3048, "step": 26945 }, { "epoch": 0.9649936433470017, "grad_norm": 1.3953019380569458, "learning_rate": 6.414109674984458e-07, "loss": 1.4156, "step": 26946 }, { "epoch": 0.9650294554765699, "grad_norm": 1.5526511669158936, "learning_rate": 6.401000198922202e-07, "loss": 1.5402, "step": 26947 }, { "epoch": 0.9650652676061382, "grad_norm": 1.5012935400009155, "learning_rate": 6.387904090452757e-07, "loss": 1.5017, "step": 26948 }, { "epoch": 0.9651010797357065, "grad_norm": 1.2707685232162476, "learning_rate": 6.374821349752424e-07, "loss": 1.1305, "step": 26949 }, { "epoch": 0.9651368918652747, "grad_norm": 1.609204649925232, "learning_rate": 6.361751976997177e-07, "loss": 1.5067, "step": 26950 }, { "epoch": 0.965172703994843, "grad_norm": 1.312595009803772, "learning_rate": 6.348695972362872e-07, "loss": 1.4279, "step": 26951 }, { "epoch": 0.9652085161244114, "grad_norm": 1.449548602104187, "learning_rate": 6.33565333602515e-07, "loss": 1.6088, "step": 26952 }, { "epoch": 0.9652443282539797, "grad_norm": 1.653937816619873, "learning_rate": 6.322624068159421e-07, "loss": 1.2204, "step": 26953 }, { "epoch": 0.9652801403835479, "grad_norm": 2.3628926277160645, "learning_rate": 6.309608168941217e-07, "loss": 1.6656, "step": 26954 }, { "epoch": 0.9653159525131162, "grad_norm": 1.80828857421875, "learning_rate": 6.296605638545172e-07, "loss": 1.3403, "step": 26955 }, { "epoch": 0.9653517646426845, "grad_norm": 1.901632308959961, "learning_rate": 6.283616477146703e-07, "loss": 1.1962, "step": 26956 }, { "epoch": 0.9653875767722527, "grad_norm": 1.6647350788116455, "learning_rate": 6.270640684920337e-07, "loss": 1.7878, "step": 26957 }, { "epoch": 0.965423388901821, "grad_norm": 1.296832799911499, "learning_rate": 6.257678262040712e-07, "loss": 1.3981, "step": 26958 }, { "epoch": 0.9654592010313894, "grad_norm": 1.8085285425186157, "learning_rate": 6.244729208682131e-07, "loss": 1.1554, "step": 26959 }, { "epoch": 0.9654950131609576, "grad_norm": 1.1620607376098633, "learning_rate": 6.231793525018903e-07, "loss": 1.0353, "step": 26960 }, { "epoch": 0.9655308252905259, "grad_norm": 1.9427391290664673, "learning_rate": 6.218871211224997e-07, "loss": 1.4748, "step": 26961 }, { "epoch": 0.9655666374200942, "grad_norm": 1.4789810180664062, "learning_rate": 6.205962267474386e-07, "loss": 1.1432, "step": 26962 }, { "epoch": 0.9656024495496625, "grad_norm": 1.6948950290679932, "learning_rate": 6.193066693940597e-07, "loss": 1.1085, "step": 26963 }, { "epoch": 0.9656382616792307, "grad_norm": 1.3911237716674805, "learning_rate": 6.180184490797158e-07, "loss": 1.3466, "step": 26964 }, { "epoch": 0.965674073808799, "grad_norm": 1.7155574560165405, "learning_rate": 6.167315658217376e-07, "loss": 1.575, "step": 26965 }, { "epoch": 0.9657098859383674, "grad_norm": 2.408755302429199, "learning_rate": 6.154460196374445e-07, "loss": 1.6934, "step": 26966 }, { "epoch": 0.9657456980679356, "grad_norm": 1.5642789602279663, "learning_rate": 6.141618105441227e-07, "loss": 1.4295, "step": 26967 }, { "epoch": 0.9657815101975039, "grad_norm": 1.8017868995666504, "learning_rate": 6.128789385590583e-07, "loss": 1.1765, "step": 26968 }, { "epoch": 0.9658173223270722, "grad_norm": 2.101372241973877, "learning_rate": 6.115974036995154e-07, "loss": 1.272, "step": 26969 }, { "epoch": 0.9658531344566404, "grad_norm": 1.5753008127212524, "learning_rate": 6.103172059827134e-07, "loss": 1.1554, "step": 26970 }, { "epoch": 0.9658889465862087, "grad_norm": 1.654207468032837, "learning_rate": 6.090383454259052e-07, "loss": 1.7734, "step": 26971 }, { "epoch": 0.965924758715777, "grad_norm": 2.283719539642334, "learning_rate": 6.077608220462771e-07, "loss": 0.987, "step": 26972 }, { "epoch": 0.9659605708453454, "grad_norm": 1.7849880456924438, "learning_rate": 6.064846358610154e-07, "loss": 1.5324, "step": 26973 }, { "epoch": 0.9659963829749136, "grad_norm": 2.171529769897461, "learning_rate": 6.052097868872953e-07, "loss": 1.4802, "step": 26974 }, { "epoch": 0.9660321951044819, "grad_norm": 2.1995201110839844, "learning_rate": 6.039362751422695e-07, "loss": 1.3182, "step": 26975 }, { "epoch": 0.9660680072340502, "grad_norm": 1.4685053825378418, "learning_rate": 6.026641006430689e-07, "loss": 1.4164, "step": 26976 }, { "epoch": 0.9661038193636184, "grad_norm": 1.7886309623718262, "learning_rate": 6.013932634068021e-07, "loss": 1.6115, "step": 26977 }, { "epoch": 0.9661396314931867, "grad_norm": 1.714840054512024, "learning_rate": 6.001237634505885e-07, "loss": 1.8374, "step": 26978 }, { "epoch": 0.966175443622755, "grad_norm": 1.6897437572479248, "learning_rate": 5.988556007914814e-07, "loss": 1.9828, "step": 26979 }, { "epoch": 0.9662112557523234, "grad_norm": 1.6048061847686768, "learning_rate": 5.975887754465559e-07, "loss": 1.5878, "step": 26980 }, { "epoch": 0.9662470678818916, "grad_norm": 1.753430724143982, "learning_rate": 5.96323287432854e-07, "loss": 1.4545, "step": 26981 }, { "epoch": 0.9662828800114599, "grad_norm": 1.6798697710037231, "learning_rate": 5.950591367674064e-07, "loss": 1.2394, "step": 26982 }, { "epoch": 0.9663186921410282, "grad_norm": 2.274351119995117, "learning_rate": 5.937963234672106e-07, "loss": 1.3658, "step": 26983 }, { "epoch": 0.9663545042705964, "grad_norm": 1.8976236581802368, "learning_rate": 5.925348475492643e-07, "loss": 1.0295, "step": 26984 }, { "epoch": 0.9663903164001647, "grad_norm": 1.966462254524231, "learning_rate": 5.912747090305315e-07, "loss": 1.659, "step": 26985 }, { "epoch": 0.966426128529733, "grad_norm": 1.4895193576812744, "learning_rate": 5.900159079279654e-07, "loss": 1.377, "step": 26986 }, { "epoch": 0.9664619406593014, "grad_norm": 1.6803325414657593, "learning_rate": 5.887584442585081e-07, "loss": 1.2961, "step": 26987 }, { "epoch": 0.9664977527888696, "grad_norm": 1.4784119129180908, "learning_rate": 5.875023180390793e-07, "loss": 1.6773, "step": 26988 }, { "epoch": 0.9665335649184379, "grad_norm": 1.7596663236618042, "learning_rate": 5.862475292865655e-07, "loss": 1.3413, "step": 26989 }, { "epoch": 0.9665693770480062, "grad_norm": 2.026500940322876, "learning_rate": 5.849940780178642e-07, "loss": 1.1128, "step": 26990 }, { "epoch": 0.9666051891775744, "grad_norm": 1.9572885036468506, "learning_rate": 5.837419642498288e-07, "loss": 1.7062, "step": 26991 }, { "epoch": 0.9666410013071427, "grad_norm": 1.2944083213806152, "learning_rate": 5.824911879993123e-07, "loss": 1.2943, "step": 26992 }, { "epoch": 0.966676813436711, "grad_norm": 2.0377883911132812, "learning_rate": 5.812417492831346e-07, "loss": 1.4307, "step": 26993 }, { "epoch": 0.9667126255662793, "grad_norm": 1.5536547899246216, "learning_rate": 5.799936481181045e-07, "loss": 1.448, "step": 26994 }, { "epoch": 0.9667484376958476, "grad_norm": 2.0070462226867676, "learning_rate": 5.787468845210198e-07, "loss": 1.2658, "step": 26995 }, { "epoch": 0.9667842498254159, "grad_norm": 1.606011986732483, "learning_rate": 5.775014585086446e-07, "loss": 1.7443, "step": 26996 }, { "epoch": 0.9668200619549842, "grad_norm": 2.032684326171875, "learning_rate": 5.762573700977547e-07, "loss": 1.5145, "step": 26997 }, { "epoch": 0.9668558740845524, "grad_norm": 1.5862538814544678, "learning_rate": 5.750146193050698e-07, "loss": 1.3515, "step": 26998 }, { "epoch": 0.9668916862141207, "grad_norm": 1.840734839439392, "learning_rate": 5.7377320614731e-07, "loss": 1.6445, "step": 26999 }, { "epoch": 0.966927498343689, "grad_norm": 2.2183516025543213, "learning_rate": 5.725331306411841e-07, "loss": 1.6061, "step": 27000 }, { "epoch": 0.9669633104732573, "grad_norm": 1.8486427068710327, "learning_rate": 5.712943928033787e-07, "loss": 1.2393, "step": 27001 }, { "epoch": 0.9669991226028256, "grad_norm": 1.6504371166229248, "learning_rate": 5.700569926505361e-07, "loss": 1.1869, "step": 27002 }, { "epoch": 0.9670349347323939, "grad_norm": 1.9643383026123047, "learning_rate": 5.688209301993319e-07, "loss": 1.5403, "step": 27003 }, { "epoch": 0.9670707468619621, "grad_norm": 1.8288096189498901, "learning_rate": 5.675862054663861e-07, "loss": 1.4967, "step": 27004 }, { "epoch": 0.9671065589915304, "grad_norm": 2.0646116733551025, "learning_rate": 5.663528184683186e-07, "loss": 1.5351, "step": 27005 }, { "epoch": 0.9671423711210987, "grad_norm": 2.1061975955963135, "learning_rate": 5.651207692216942e-07, "loss": 1.564, "step": 27006 }, { "epoch": 0.967178183250667, "grad_norm": 1.4611119031906128, "learning_rate": 5.638900577431216e-07, "loss": 1.3468, "step": 27007 }, { "epoch": 0.9672139953802353, "grad_norm": 1.4998737573623657, "learning_rate": 5.626606840491433e-07, "loss": 1.6063, "step": 27008 }, { "epoch": 0.9672498075098036, "grad_norm": 1.3977317810058594, "learning_rate": 5.614326481562904e-07, "loss": 1.4851, "step": 27009 }, { "epoch": 0.9672856196393719, "grad_norm": 1.8437548875808716, "learning_rate": 5.602059500811052e-07, "loss": 1.705, "step": 27010 }, { "epoch": 0.9673214317689401, "grad_norm": 1.445892572402954, "learning_rate": 5.589805898400746e-07, "loss": 1.3189, "step": 27011 }, { "epoch": 0.9673572438985084, "grad_norm": 1.510178804397583, "learning_rate": 5.577565674496965e-07, "loss": 1.3705, "step": 27012 }, { "epoch": 0.9673930560280767, "grad_norm": 1.8084083795547485, "learning_rate": 5.565338829264355e-07, "loss": 1.4164, "step": 27013 }, { "epoch": 0.967428868157645, "grad_norm": 1.2820135354995728, "learning_rate": 5.553125362867228e-07, "loss": 1.2558, "step": 27014 }, { "epoch": 0.9674646802872133, "grad_norm": 1.898093581199646, "learning_rate": 5.540925275470232e-07, "loss": 1.796, "step": 27015 }, { "epoch": 0.9675004924167816, "grad_norm": 1.603390097618103, "learning_rate": 5.528738567237235e-07, "loss": 1.697, "step": 27016 }, { "epoch": 0.9675363045463499, "grad_norm": 1.477070927619934, "learning_rate": 5.516565238332328e-07, "loss": 1.2186, "step": 27017 }, { "epoch": 0.9675721166759181, "grad_norm": 1.664985179901123, "learning_rate": 5.504405288919156e-07, "loss": 1.1862, "step": 27018 }, { "epoch": 0.9676079288054864, "grad_norm": 1.6416716575622559, "learning_rate": 5.492258719161481e-07, "loss": 1.4381, "step": 27019 }, { "epoch": 0.9676437409350547, "grad_norm": 1.1065994501113892, "learning_rate": 5.480125529222613e-07, "loss": 1.0456, "step": 27020 }, { "epoch": 0.9676795530646229, "grad_norm": 1.6238538026809692, "learning_rate": 5.468005719265868e-07, "loss": 1.6424, "step": 27021 }, { "epoch": 0.9677153651941913, "grad_norm": 1.3962935209274292, "learning_rate": 5.455899289454225e-07, "loss": 1.6138, "step": 27022 }, { "epoch": 0.9677511773237596, "grad_norm": 1.931966781616211, "learning_rate": 5.443806239950555e-07, "loss": 1.2321, "step": 27023 }, { "epoch": 0.9677869894533279, "grad_norm": 1.9834296703338623, "learning_rate": 5.431726570917617e-07, "loss": 1.26, "step": 27024 }, { "epoch": 0.9678228015828961, "grad_norm": 1.65738844871521, "learning_rate": 5.419660282517836e-07, "loss": 1.153, "step": 27025 }, { "epoch": 0.9678586137124644, "grad_norm": 1.481104850769043, "learning_rate": 5.407607374913748e-07, "loss": 1.4851, "step": 27026 }, { "epoch": 0.9678944258420327, "grad_norm": 1.8398284912109375, "learning_rate": 5.395567848267225e-07, "loss": 1.6348, "step": 27027 }, { "epoch": 0.9679302379716009, "grad_norm": 1.63754141330719, "learning_rate": 5.383541702740469e-07, "loss": 1.5278, "step": 27028 }, { "epoch": 0.9679660501011693, "grad_norm": 1.5657932758331299, "learning_rate": 5.37152893849513e-07, "loss": 1.4017, "step": 27029 }, { "epoch": 0.9680018622307376, "grad_norm": 1.9750888347625732, "learning_rate": 5.359529555692966e-07, "loss": 1.5823, "step": 27030 }, { "epoch": 0.9680376743603059, "grad_norm": 1.7874226570129395, "learning_rate": 5.347543554495293e-07, "loss": 1.4156, "step": 27031 }, { "epoch": 0.9680734864898741, "grad_norm": 2.025090456008911, "learning_rate": 5.335570935063427e-07, "loss": 1.6055, "step": 27032 }, { "epoch": 0.9681092986194424, "grad_norm": 1.950339913368225, "learning_rate": 5.323611697558462e-07, "loss": 1.2355, "step": 27033 }, { "epoch": 0.9681451107490107, "grad_norm": 1.6537014245986938, "learning_rate": 5.311665842141155e-07, "loss": 1.3523, "step": 27034 }, { "epoch": 0.9681809228785789, "grad_norm": 1.566712498664856, "learning_rate": 5.299733368972492e-07, "loss": 1.5859, "step": 27035 }, { "epoch": 0.9682167350081473, "grad_norm": 2.611342668533325, "learning_rate": 5.287814278212677e-07, "loss": 1.6179, "step": 27036 }, { "epoch": 0.9682525471377156, "grad_norm": 1.9200516939163208, "learning_rate": 5.275908570022359e-07, "loss": 1.6782, "step": 27037 }, { "epoch": 0.9682883592672838, "grad_norm": 1.4160640239715576, "learning_rate": 5.26401624456152e-07, "loss": 1.287, "step": 27038 }, { "epoch": 0.9683241713968521, "grad_norm": 1.3941360712051392, "learning_rate": 5.252137301990256e-07, "loss": 1.3371, "step": 27039 }, { "epoch": 0.9683599835264204, "grad_norm": 3.1313459873199463, "learning_rate": 5.240271742468328e-07, "loss": 1.3915, "step": 27040 }, { "epoch": 0.9683957956559887, "grad_norm": 2.499171018600464, "learning_rate": 5.228419566155385e-07, "loss": 1.3878, "step": 27041 }, { "epoch": 0.9684316077855569, "grad_norm": 1.5517430305480957, "learning_rate": 5.216580773210966e-07, "loss": 1.3956, "step": 27042 }, { "epoch": 0.9684674199151253, "grad_norm": 1.5848878622055054, "learning_rate": 5.204755363794167e-07, "loss": 1.3835, "step": 27043 }, { "epoch": 0.9685032320446936, "grad_norm": 1.4164799451828003, "learning_rate": 5.192943338064305e-07, "loss": 1.5956, "step": 27044 }, { "epoch": 0.9685390441742618, "grad_norm": 1.7449172735214233, "learning_rate": 5.18114469618014e-07, "loss": 1.7308, "step": 27045 }, { "epoch": 0.9685748563038301, "grad_norm": 1.5101077556610107, "learning_rate": 5.169359438300436e-07, "loss": 1.1332, "step": 27046 }, { "epoch": 0.9686106684333984, "grad_norm": 1.4586726427078247, "learning_rate": 5.157587564583733e-07, "loss": 1.5053, "step": 27047 }, { "epoch": 0.9686464805629666, "grad_norm": 1.5485001802444458, "learning_rate": 5.145829075188457e-07, "loss": 1.1668, "step": 27048 }, { "epoch": 0.9686822926925349, "grad_norm": 1.5960198640823364, "learning_rate": 5.134083970272819e-07, "loss": 1.6091, "step": 27049 }, { "epoch": 0.9687181048221033, "grad_norm": 1.9195680618286133, "learning_rate": 5.1223522499948e-07, "loss": 1.3194, "step": 27050 }, { "epoch": 0.9687539169516716, "grad_norm": 1.7722783088684082, "learning_rate": 5.110633914512164e-07, "loss": 1.5024, "step": 27051 }, { "epoch": 0.9687897290812398, "grad_norm": 1.3769675493240356, "learning_rate": 5.098928963982674e-07, "loss": 1.5481, "step": 27052 }, { "epoch": 0.9688255412108081, "grad_norm": 1.214339256286621, "learning_rate": 5.08723739856376e-07, "loss": 1.1588, "step": 27053 }, { "epoch": 0.9688613533403764, "grad_norm": 1.6598423719406128, "learning_rate": 5.07555921841274e-07, "loss": 1.4385, "step": 27054 }, { "epoch": 0.9688971654699446, "grad_norm": 1.1218611001968384, "learning_rate": 5.06389442368671e-07, "loss": 1.3616, "step": 27055 }, { "epoch": 0.9689329775995129, "grad_norm": 1.3781205415725708, "learning_rate": 5.052243014542546e-07, "loss": 1.5814, "step": 27056 }, { "epoch": 0.9689687897290813, "grad_norm": 1.7898406982421875, "learning_rate": 5.040604991137121e-07, "loss": 1.1487, "step": 27057 }, { "epoch": 0.9690046018586496, "grad_norm": 1.6360304355621338, "learning_rate": 5.028980353626866e-07, "loss": 1.4937, "step": 27058 }, { "epoch": 0.9690404139882178, "grad_norm": 1.2866710424423218, "learning_rate": 5.017369102168435e-07, "loss": 1.2457, "step": 27059 }, { "epoch": 0.9690762261177861, "grad_norm": 1.3128433227539062, "learning_rate": 5.005771236917811e-07, "loss": 1.4053, "step": 27060 }, { "epoch": 0.9691120382473544, "grad_norm": 1.6207622289657593, "learning_rate": 4.994186758030983e-07, "loss": 1.5096, "step": 27061 }, { "epoch": 0.9691478503769226, "grad_norm": 1.527236819267273, "learning_rate": 4.982615665663937e-07, "loss": 1.4559, "step": 27062 }, { "epoch": 0.9691836625064909, "grad_norm": 1.528676152229309, "learning_rate": 4.971057959972325e-07, "loss": 1.1819, "step": 27063 }, { "epoch": 0.9692194746360593, "grad_norm": 1.549570083618164, "learning_rate": 4.95951364111169e-07, "loss": 1.344, "step": 27064 }, { "epoch": 0.9692552867656276, "grad_norm": 2.239349603652954, "learning_rate": 4.947982709237131e-07, "loss": 1.349, "step": 27065 }, { "epoch": 0.9692910988951958, "grad_norm": 1.8288278579711914, "learning_rate": 4.936465164504079e-07, "loss": 1.8213, "step": 27066 }, { "epoch": 0.9693269110247641, "grad_norm": 1.5280758142471313, "learning_rate": 4.924961007067408e-07, "loss": 1.7326, "step": 27067 }, { "epoch": 0.9693627231543324, "grad_norm": 1.4166117906570435, "learning_rate": 4.913470237081774e-07, "loss": 1.5967, "step": 27068 }, { "epoch": 0.9693985352839006, "grad_norm": 1.780877947807312, "learning_rate": 4.90199285470172e-07, "loss": 1.6305, "step": 27069 }, { "epoch": 0.9694343474134689, "grad_norm": 1.342950463294983, "learning_rate": 4.8905288600819e-07, "loss": 1.2657, "step": 27070 }, { "epoch": 0.9694701595430373, "grad_norm": 1.7711330652236938, "learning_rate": 4.879078253376412e-07, "loss": 1.4819, "step": 27071 }, { "epoch": 0.9695059716726055, "grad_norm": 1.4958521127700806, "learning_rate": 4.867641034739134e-07, "loss": 1.4194, "step": 27072 }, { "epoch": 0.9695417838021738, "grad_norm": 1.9198914766311646, "learning_rate": 4.856217204324275e-07, "loss": 1.6569, "step": 27073 }, { "epoch": 0.9695775959317421, "grad_norm": 1.47645902633667, "learning_rate": 4.844806762285381e-07, "loss": 1.2356, "step": 27074 }, { "epoch": 0.9696134080613104, "grad_norm": 1.583561658859253, "learning_rate": 4.833409708775882e-07, "loss": 1.4539, "step": 27075 }, { "epoch": 0.9696492201908786, "grad_norm": 1.3329954147338867, "learning_rate": 4.822026043949213e-07, "loss": 1.4844, "step": 27076 }, { "epoch": 0.9696850323204469, "grad_norm": 1.3243921995162964, "learning_rate": 4.810655767958583e-07, "loss": 1.1535, "step": 27077 }, { "epoch": 0.9697208444500153, "grad_norm": 1.422972559928894, "learning_rate": 4.799298880956759e-07, "loss": 1.4926, "step": 27078 }, { "epoch": 0.9697566565795835, "grad_norm": 1.622788667678833, "learning_rate": 4.787955383096731e-07, "loss": 1.1752, "step": 27079 }, { "epoch": 0.9697924687091518, "grad_norm": 1.558379888534546, "learning_rate": 4.776625274530933e-07, "loss": 1.1288, "step": 27080 }, { "epoch": 0.9698282808387201, "grad_norm": 1.6241610050201416, "learning_rate": 4.765308555411907e-07, "loss": 1.1744, "step": 27081 }, { "epoch": 0.9698640929682883, "grad_norm": 1.4816008806228638, "learning_rate": 4.754005225891978e-07, "loss": 1.344, "step": 27082 }, { "epoch": 0.9698999050978566, "grad_norm": 1.7448887825012207, "learning_rate": 4.742715286123134e-07, "loss": 1.3445, "step": 27083 }, { "epoch": 0.9699357172274249, "grad_norm": 1.9753953218460083, "learning_rate": 4.7314387362572545e-07, "loss": 1.3754, "step": 27084 }, { "epoch": 0.9699715293569933, "grad_norm": 1.8347922563552856, "learning_rate": 4.7201755764459953e-07, "loss": 1.4986, "step": 27085 }, { "epoch": 0.9700073414865615, "grad_norm": 1.6232060194015503, "learning_rate": 4.7089258068410133e-07, "loss": 1.4232, "step": 27086 }, { "epoch": 0.9700431536161298, "grad_norm": 2.0615720748901367, "learning_rate": 4.6976894275935215e-07, "loss": 1.9353, "step": 27087 }, { "epoch": 0.9700789657456981, "grad_norm": 1.7156115770339966, "learning_rate": 4.6864664388548417e-07, "loss": 1.4939, "step": 27088 }, { "epoch": 0.9701147778752663, "grad_norm": 1.5696758031845093, "learning_rate": 4.6752568407759655e-07, "loss": 1.2644, "step": 27089 }, { "epoch": 0.9701505900048346, "grad_norm": 1.4447704553604126, "learning_rate": 4.66406063350755e-07, "loss": 1.4013, "step": 27090 }, { "epoch": 0.9701864021344029, "grad_norm": 1.5259318351745605, "learning_rate": 4.652877817200252e-07, "loss": 1.5891, "step": 27091 }, { "epoch": 0.9702222142639713, "grad_norm": 1.3538051843643188, "learning_rate": 4.6417083920046176e-07, "loss": 1.4276, "step": 27092 }, { "epoch": 0.9702580263935395, "grad_norm": 1.2154916524887085, "learning_rate": 4.630552358070972e-07, "loss": 1.2274, "step": 27093 }, { "epoch": 0.9702938385231078, "grad_norm": 1.5083569288253784, "learning_rate": 4.6194097155491944e-07, "loss": 1.7358, "step": 27094 }, { "epoch": 0.9703296506526761, "grad_norm": 1.5696214437484741, "learning_rate": 4.6082804645893874e-07, "loss": 1.4356, "step": 27095 }, { "epoch": 0.9703654627822443, "grad_norm": 1.5140817165374756, "learning_rate": 4.597164605341209e-07, "loss": 1.3379, "step": 27096 }, { "epoch": 0.9704012749118126, "grad_norm": 1.9681334495544434, "learning_rate": 4.5860621379540944e-07, "loss": 1.3589, "step": 27097 }, { "epoch": 0.9704370870413809, "grad_norm": 1.6079471111297607, "learning_rate": 4.574973062577592e-07, "loss": 1.2374, "step": 27098 }, { "epoch": 0.9704728991709493, "grad_norm": 1.9617375135421753, "learning_rate": 4.5638973793608043e-07, "loss": 1.429, "step": 27099 }, { "epoch": 0.9705087113005175, "grad_norm": 1.4626727104187012, "learning_rate": 4.5528350884528335e-07, "loss": 1.5835, "step": 27100 }, { "epoch": 0.9705445234300858, "grad_norm": 1.6523990631103516, "learning_rate": 4.5417861900023397e-07, "loss": 1.3469, "step": 27101 }, { "epoch": 0.9705803355596541, "grad_norm": 1.3021047115325928, "learning_rate": 4.5307506841580914e-07, "loss": 1.2336, "step": 27102 }, { "epoch": 0.9706161476892223, "grad_norm": 1.75713050365448, "learning_rate": 4.5197285710685265e-07, "loss": 1.8113, "step": 27103 }, { "epoch": 0.9706519598187906, "grad_norm": 1.3806419372558594, "learning_rate": 4.5087198508819705e-07, "loss": 1.3937, "step": 27104 }, { "epoch": 0.9706877719483589, "grad_norm": 1.4847077131271362, "learning_rate": 4.497724523746416e-07, "loss": 1.3979, "step": 27105 }, { "epoch": 0.9707235840779272, "grad_norm": 1.5948593616485596, "learning_rate": 4.486742589809967e-07, "loss": 1.5002, "step": 27106 }, { "epoch": 0.9707593962074955, "grad_norm": 1.5786889791488647, "learning_rate": 4.4757740492201717e-07, "loss": 1.5142, "step": 27107 }, { "epoch": 0.9707952083370638, "grad_norm": 2.1769702434539795, "learning_rate": 4.464818902124801e-07, "loss": 1.2433, "step": 27108 }, { "epoch": 0.970831020466632, "grad_norm": 1.6344908475875854, "learning_rate": 4.4538771486710706e-07, "loss": 1.8311, "step": 27109 }, { "epoch": 0.9708668325962003, "grad_norm": 2.086514472961426, "learning_rate": 4.442948789006307e-07, "loss": 1.4124, "step": 27110 }, { "epoch": 0.9709026447257686, "grad_norm": 2.050281286239624, "learning_rate": 4.432033823277504e-07, "loss": 1.2815, "step": 27111 }, { "epoch": 0.9709384568553369, "grad_norm": 2.2314565181732178, "learning_rate": 4.4211322516314324e-07, "loss": 1.7093, "step": 27112 }, { "epoch": 0.9709742689849052, "grad_norm": 1.5358484983444214, "learning_rate": 4.410244074214864e-07, "loss": 1.4834, "step": 27113 }, { "epoch": 0.9710100811144735, "grad_norm": 1.5115602016448975, "learning_rate": 4.399369291174349e-07, "loss": 1.2563, "step": 27114 }, { "epoch": 0.9710458932440418, "grad_norm": 2.1405441761016846, "learning_rate": 4.388507902655881e-07, "loss": 1.3515, "step": 27115 }, { "epoch": 0.97108170537361, "grad_norm": 1.8947888612747192, "learning_rate": 4.377659908805898e-07, "loss": 1.5495, "step": 27116 }, { "epoch": 0.9711175175031783, "grad_norm": 1.3705615997314453, "learning_rate": 4.366825309770284e-07, "loss": 1.0637, "step": 27117 }, { "epoch": 0.9711533296327466, "grad_norm": 2.133995532989502, "learning_rate": 4.3560041056947e-07, "loss": 1.4022, "step": 27118 }, { "epoch": 0.9711891417623149, "grad_norm": 2.3694558143615723, "learning_rate": 4.345196296724807e-07, "loss": 1.4344, "step": 27119 }, { "epoch": 0.9712249538918832, "grad_norm": 1.50997793674469, "learning_rate": 4.334401883005934e-07, "loss": 1.4774, "step": 27120 }, { "epoch": 0.9712607660214515, "grad_norm": 2.032027244567871, "learning_rate": 4.32362086468352e-07, "loss": 1.5578, "step": 27121 }, { "epoch": 0.9712965781510198, "grad_norm": 1.652375340461731, "learning_rate": 4.312853241902337e-07, "loss": 1.4457, "step": 27122 }, { "epoch": 0.971332390280588, "grad_norm": 1.819873332977295, "learning_rate": 4.3020990148073815e-07, "loss": 1.6141, "step": 27123 }, { "epoch": 0.9713682024101563, "grad_norm": 1.587869644165039, "learning_rate": 4.2913581835433147e-07, "loss": 1.6089, "step": 27124 }, { "epoch": 0.9714040145397246, "grad_norm": 1.382434368133545, "learning_rate": 4.2806307482546883e-07, "loss": 1.5386, "step": 27125 }, { "epoch": 0.9714398266692928, "grad_norm": 1.4083807468414307, "learning_rate": 4.2699167090858303e-07, "loss": 1.2496, "step": 27126 }, { "epoch": 0.9714756387988612, "grad_norm": 1.9262927770614624, "learning_rate": 4.259216066180738e-07, "loss": 1.4704, "step": 27127 }, { "epoch": 0.9715114509284295, "grad_norm": 1.735276460647583, "learning_rate": 4.248528819683517e-07, "loss": 1.3874, "step": 27128 }, { "epoch": 0.9715472630579978, "grad_norm": 1.3945090770721436, "learning_rate": 4.2378549697380533e-07, "loss": 1.4935, "step": 27129 }, { "epoch": 0.971583075187566, "grad_norm": 1.7358595132827759, "learning_rate": 4.2271945164876756e-07, "loss": 1.4826, "step": 27130 }, { "epoch": 0.9716188873171343, "grad_norm": 1.6000685691833496, "learning_rate": 4.216547460075937e-07, "loss": 1.4768, "step": 27131 }, { "epoch": 0.9716546994467026, "grad_norm": 1.8158823251724243, "learning_rate": 4.2059138006460554e-07, "loss": 1.4642, "step": 27132 }, { "epoch": 0.9716905115762708, "grad_norm": 1.5935328006744385, "learning_rate": 4.1952935383412494e-07, "loss": 1.3549, "step": 27133 }, { "epoch": 0.9717263237058391, "grad_norm": 1.1625258922576904, "learning_rate": 4.1846866733041834e-07, "loss": 1.3621, "step": 27134 }, { "epoch": 0.9717621358354075, "grad_norm": 1.8553330898284912, "learning_rate": 4.1740932056776317e-07, "loss": 1.5953, "step": 27135 }, { "epoch": 0.9717979479649758, "grad_norm": 1.6651084423065186, "learning_rate": 4.1635131356041467e-07, "loss": 1.6272, "step": 27136 }, { "epoch": 0.971833760094544, "grad_norm": 1.5465548038482666, "learning_rate": 4.1529464632260597e-07, "loss": 1.3804, "step": 27137 }, { "epoch": 0.9718695722241123, "grad_norm": 1.316346526145935, "learning_rate": 4.1423931886854785e-07, "loss": 1.5989, "step": 27138 }, { "epoch": 0.9719053843536806, "grad_norm": 1.3082847595214844, "learning_rate": 4.131853312124512e-07, "loss": 1.5898, "step": 27139 }, { "epoch": 0.9719411964832488, "grad_norm": 1.3995599746704102, "learning_rate": 4.1213268336849355e-07, "loss": 1.4302, "step": 27140 }, { "epoch": 0.9719770086128171, "grad_norm": 1.8354229927062988, "learning_rate": 4.1108137535081914e-07, "loss": 1.3678, "step": 27141 }, { "epoch": 0.9720128207423855, "grad_norm": 1.4459917545318604, "learning_rate": 4.1003140717358336e-07, "loss": 1.4699, "step": 27142 }, { "epoch": 0.9720486328719538, "grad_norm": 1.557350993156433, "learning_rate": 4.089827788509304e-07, "loss": 1.2917, "step": 27143 }, { "epoch": 0.972084445001522, "grad_norm": 1.7156989574432373, "learning_rate": 4.079354903969379e-07, "loss": 1.4024, "step": 27144 }, { "epoch": 0.9721202571310903, "grad_norm": 2.2056097984313965, "learning_rate": 4.068895418257057e-07, "loss": 1.2329, "step": 27145 }, { "epoch": 0.9721560692606586, "grad_norm": 1.616719126701355, "learning_rate": 4.0584493315131146e-07, "loss": 1.4484, "step": 27146 }, { "epoch": 0.9721918813902268, "grad_norm": 1.549475908279419, "learning_rate": 4.048016643878105e-07, "loss": 1.6468, "step": 27147 }, { "epoch": 0.9722276935197951, "grad_norm": 1.5446029901504517, "learning_rate": 4.037597355492362e-07, "loss": 1.5179, "step": 27148 }, { "epoch": 0.9722635056493635, "grad_norm": 1.9477657079696655, "learning_rate": 4.0271914664959944e-07, "loss": 1.5566, "step": 27149 }, { "epoch": 0.9722993177789317, "grad_norm": 1.5786957740783691, "learning_rate": 4.016798977029113e-07, "loss": 1.2298, "step": 27150 }, { "epoch": 0.9723351299085, "grad_norm": 1.673638939857483, "learning_rate": 4.006419887231383e-07, "loss": 1.5531, "step": 27151 }, { "epoch": 0.9723709420380683, "grad_norm": 1.7010632753372192, "learning_rate": 3.9960541972426936e-07, "loss": 1.5984, "step": 27152 }, { "epoch": 0.9724067541676366, "grad_norm": 1.7504063844680786, "learning_rate": 3.985701907202155e-07, "loss": 1.3279, "step": 27153 }, { "epoch": 0.9724425662972048, "grad_norm": 1.5936665534973145, "learning_rate": 3.975363017249323e-07, "loss": 1.2734, "step": 27154 }, { "epoch": 0.9724783784267731, "grad_norm": 1.544960856437683, "learning_rate": 3.9650375275231967e-07, "loss": 1.4678, "step": 27155 }, { "epoch": 0.9725141905563415, "grad_norm": 1.6201308965682983, "learning_rate": 3.9547254381626653e-07, "loss": 1.3237, "step": 27156 }, { "epoch": 0.9725500026859097, "grad_norm": 1.5041776895523071, "learning_rate": 3.944426749306507e-07, "loss": 1.4167, "step": 27157 }, { "epoch": 0.972585814815478, "grad_norm": 1.9468683004379272, "learning_rate": 3.934141461093277e-07, "loss": 1.2188, "step": 27158 }, { "epoch": 0.9726216269450463, "grad_norm": 1.457405686378479, "learning_rate": 3.9238695736614207e-07, "loss": 1.3617, "step": 27159 }, { "epoch": 0.9726574390746145, "grad_norm": 1.630591630935669, "learning_rate": 3.913611087148938e-07, "loss": 1.438, "step": 27160 }, { "epoch": 0.9726932512041828, "grad_norm": 1.6744720935821533, "learning_rate": 3.903366001694053e-07, "loss": 1.4143, "step": 27161 }, { "epoch": 0.9727290633337511, "grad_norm": 1.620065689086914, "learning_rate": 3.8931343174344324e-07, "loss": 1.2351, "step": 27162 }, { "epoch": 0.9727648754633195, "grad_norm": 1.7324904203414917, "learning_rate": 3.882916034507855e-07, "loss": 1.212, "step": 27163 }, { "epoch": 0.9728006875928877, "grad_norm": 1.5588440895080566, "learning_rate": 3.8727111530516556e-07, "loss": 1.4532, "step": 27164 }, { "epoch": 0.972836499722456, "grad_norm": 1.5138558149337769, "learning_rate": 3.86251967320328e-07, "loss": 1.4652, "step": 27165 }, { "epoch": 0.9728723118520243, "grad_norm": 1.6585930585861206, "learning_rate": 3.85234159509984e-07, "loss": 1.5071, "step": 27166 }, { "epoch": 0.9729081239815925, "grad_norm": 1.572977900505066, "learning_rate": 3.842176918878115e-07, "loss": 1.5052, "step": 27167 }, { "epoch": 0.9729439361111608, "grad_norm": 1.2999725341796875, "learning_rate": 3.832025644674886e-07, "loss": 1.2422, "step": 27168 }, { "epoch": 0.9729797482407291, "grad_norm": 2.5449912548065186, "learning_rate": 3.821887772626931e-07, "loss": 1.3325, "step": 27169 }, { "epoch": 0.9730155603702975, "grad_norm": 1.7839765548706055, "learning_rate": 3.8117633028704745e-07, "loss": 1.0184, "step": 27170 }, { "epoch": 0.9730513724998657, "grad_norm": 1.3288191556930542, "learning_rate": 3.801652235541631e-07, "loss": 1.4608, "step": 27171 }, { "epoch": 0.973087184629434, "grad_norm": 1.5566009283065796, "learning_rate": 3.791554570776734e-07, "loss": 1.4342, "step": 27172 }, { "epoch": 0.9731229967590023, "grad_norm": 1.7924531698226929, "learning_rate": 3.781470308711343e-07, "loss": 1.2537, "step": 27173 }, { "epoch": 0.9731588088885705, "grad_norm": 1.666527271270752, "learning_rate": 3.771399449481239e-07, "loss": 1.0901, "step": 27174 }, { "epoch": 0.9731946210181388, "grad_norm": 1.79993736743927, "learning_rate": 3.761341993221867e-07, "loss": 1.2884, "step": 27175 }, { "epoch": 0.9732304331477071, "grad_norm": 1.9205312728881836, "learning_rate": 3.7512979400686763e-07, "loss": 1.389, "step": 27176 }, { "epoch": 0.9732662452772755, "grad_norm": 1.901877999305725, "learning_rate": 3.741267290156669e-07, "loss": 1.6964, "step": 27177 }, { "epoch": 0.9733020574068437, "grad_norm": 1.5668498277664185, "learning_rate": 3.7312500436208487e-07, "loss": 1.1559, "step": 27178 }, { "epoch": 0.973337869536412, "grad_norm": 1.4472907781600952, "learning_rate": 3.7212462005959957e-07, "loss": 1.306, "step": 27179 }, { "epoch": 0.9733736816659803, "grad_norm": 1.4478232860565186, "learning_rate": 3.7112557612165586e-07, "loss": 1.3558, "step": 27180 }, { "epoch": 0.9734094937955485, "grad_norm": 1.8058240413665771, "learning_rate": 3.7012787256172075e-07, "loss": 1.0638, "step": 27181 }, { "epoch": 0.9734453059251168, "grad_norm": 1.9508631229400635, "learning_rate": 3.6913150939318353e-07, "loss": 1.5124, "step": 27182 }, { "epoch": 0.9734811180546851, "grad_norm": 1.470751404762268, "learning_rate": 3.6813648662947785e-07, "loss": 1.427, "step": 27183 }, { "epoch": 0.9735169301842534, "grad_norm": 1.1770071983337402, "learning_rate": 3.67142804283982e-07, "loss": 1.2635, "step": 27184 }, { "epoch": 0.9735527423138217, "grad_norm": 1.5016038417816162, "learning_rate": 3.661504623700629e-07, "loss": 1.2942, "step": 27185 }, { "epoch": 0.97358855444339, "grad_norm": 1.4237912893295288, "learning_rate": 3.6515946090106557e-07, "loss": 1.7626, "step": 27186 }, { "epoch": 0.9736243665729583, "grad_norm": 1.517599105834961, "learning_rate": 3.641697998903237e-07, "loss": 1.4293, "step": 27187 }, { "epoch": 0.9736601787025265, "grad_norm": 1.4845151901245117, "learning_rate": 3.631814793511712e-07, "loss": 1.1693, "step": 27188 }, { "epoch": 0.9736959908320948, "grad_norm": 1.8167911767959595, "learning_rate": 3.621944992968751e-07, "loss": 1.401, "step": 27189 }, { "epoch": 0.9737318029616631, "grad_norm": 1.7373158931732178, "learning_rate": 3.612088597407359e-07, "loss": 1.4933, "step": 27190 }, { "epoch": 0.9737676150912314, "grad_norm": 1.7631866931915283, "learning_rate": 3.6022456069600973e-07, "loss": 1.3376, "step": 27191 }, { "epoch": 0.9738034272207997, "grad_norm": 1.3551874160766602, "learning_rate": 3.592416021759304e-07, "loss": 1.0676, "step": 27192 }, { "epoch": 0.973839239350368, "grad_norm": 1.2745553255081177, "learning_rate": 3.5825998419372065e-07, "loss": 1.3036, "step": 27193 }, { "epoch": 0.9738750514799362, "grad_norm": 1.5684278011322021, "learning_rate": 3.5727970676260327e-07, "loss": 1.1635, "step": 27194 }, { "epoch": 0.9739108636095045, "grad_norm": 1.5761404037475586, "learning_rate": 3.563007698957566e-07, "loss": 1.2785, "step": 27195 }, { "epoch": 0.9739466757390728, "grad_norm": 1.679574728012085, "learning_rate": 3.553231736063589e-07, "loss": 1.723, "step": 27196 }, { "epoch": 0.9739824878686411, "grad_norm": 1.4595870971679688, "learning_rate": 3.5434691790754427e-07, "loss": 1.6485, "step": 27197 }, { "epoch": 0.9740182999982094, "grad_norm": 1.6029869318008423, "learning_rate": 3.5337200281245765e-07, "loss": 1.4157, "step": 27198 }, { "epoch": 0.9740541121277777, "grad_norm": 1.6887636184692383, "learning_rate": 3.52398428334233e-07, "loss": 1.6353, "step": 27199 }, { "epoch": 0.974089924257346, "grad_norm": 1.629212737083435, "learning_rate": 3.514261944859376e-07, "loss": 1.4481, "step": 27200 }, { "epoch": 0.9741257363869142, "grad_norm": 1.597827672958374, "learning_rate": 3.5045530128066106e-07, "loss": 1.5889, "step": 27201 }, { "epoch": 0.9741615485164825, "grad_norm": 1.69936203956604, "learning_rate": 3.4948574873148174e-07, "loss": 1.5639, "step": 27202 }, { "epoch": 0.9741973606460508, "grad_norm": 2.234874963760376, "learning_rate": 3.4851753685142265e-07, "loss": 1.637, "step": 27203 }, { "epoch": 0.974233172775619, "grad_norm": 1.872274398803711, "learning_rate": 3.4755066565351767e-07, "loss": 1.4049, "step": 27204 }, { "epoch": 0.9742689849051874, "grad_norm": 1.5312186479568481, "learning_rate": 3.465851351507787e-07, "loss": 1.2672, "step": 27205 }, { "epoch": 0.9743047970347557, "grad_norm": 1.6697921752929688, "learning_rate": 3.456209453561954e-07, "loss": 1.6174, "step": 27206 }, { "epoch": 0.974340609164324, "grad_norm": 1.3625354766845703, "learning_rate": 3.4465809628273504e-07, "loss": 0.9797, "step": 27207 }, { "epoch": 0.9743764212938922, "grad_norm": 1.308997631072998, "learning_rate": 3.4369658794335403e-07, "loss": 1.3414, "step": 27208 }, { "epoch": 0.9744122334234605, "grad_norm": 1.4477964639663696, "learning_rate": 3.4273642035099753e-07, "loss": 1.2255, "step": 27209 }, { "epoch": 0.9744480455530288, "grad_norm": 1.5210626125335693, "learning_rate": 3.417775935185663e-07, "loss": 1.4795, "step": 27210 }, { "epoch": 0.974483857682597, "grad_norm": 1.6373701095581055, "learning_rate": 3.408201074589612e-07, "loss": 1.6343, "step": 27211 }, { "epoch": 0.9745196698121654, "grad_norm": 1.727810263633728, "learning_rate": 3.39863962185083e-07, "loss": 1.565, "step": 27212 }, { "epoch": 0.9745554819417337, "grad_norm": 1.3922851085662842, "learning_rate": 3.3890915770977694e-07, "loss": 1.2344, "step": 27213 }, { "epoch": 0.974591294071302, "grad_norm": 1.5838536024093628, "learning_rate": 3.379556940458883e-07, "loss": 1.3403, "step": 27214 }, { "epoch": 0.9746271062008702, "grad_norm": 1.6230381727218628, "learning_rate": 3.3700357120626247e-07, "loss": 1.4572, "step": 27215 }, { "epoch": 0.9746629183304385, "grad_norm": 1.2696282863616943, "learning_rate": 3.360527892036891e-07, "loss": 1.6312, "step": 27216 }, { "epoch": 0.9746987304600068, "grad_norm": 1.652672529220581, "learning_rate": 3.351033480509691e-07, "loss": 1.4796, "step": 27217 }, { "epoch": 0.974734542589575, "grad_norm": 1.8868895769119263, "learning_rate": 3.3415524776088116e-07, "loss": 1.3838, "step": 27218 }, { "epoch": 0.9747703547191434, "grad_norm": 1.9636809825897217, "learning_rate": 3.332084883461706e-07, "loss": 1.42, "step": 27219 }, { "epoch": 0.9748061668487117, "grad_norm": 1.3785717487335205, "learning_rate": 3.3226306981957166e-07, "loss": 1.6404, "step": 27220 }, { "epoch": 0.97484197897828, "grad_norm": 1.4908267259597778, "learning_rate": 3.313189921938187e-07, "loss": 1.2639, "step": 27221 }, { "epoch": 0.9748777911078482, "grad_norm": 2.0432732105255127, "learning_rate": 3.3037625548160143e-07, "loss": 1.8091, "step": 27222 }, { "epoch": 0.9749136032374165, "grad_norm": 2.1694438457489014, "learning_rate": 3.294348596956098e-07, "loss": 1.3754, "step": 27223 }, { "epoch": 0.9749494153669848, "grad_norm": 1.6928741931915283, "learning_rate": 3.2849480484851145e-07, "loss": 1.213, "step": 27224 }, { "epoch": 0.974985227496553, "grad_norm": 1.616788625717163, "learning_rate": 3.275560909529407e-07, "loss": 1.5529, "step": 27225 }, { "epoch": 0.9750210396261214, "grad_norm": 1.5182127952575684, "learning_rate": 3.2661871802154296e-07, "loss": 1.4226, "step": 27226 }, { "epoch": 0.9750568517556897, "grad_norm": 1.6872198581695557, "learning_rate": 3.256826860669193e-07, "loss": 1.3533, "step": 27227 }, { "epoch": 0.975092663885258, "grad_norm": 2.066617012023926, "learning_rate": 3.2474799510165965e-07, "loss": 1.5938, "step": 27228 }, { "epoch": 0.9751284760148262, "grad_norm": 1.7410343885421753, "learning_rate": 3.238146451383428e-07, "loss": 1.4438, "step": 27229 }, { "epoch": 0.9751642881443945, "grad_norm": 1.7189652919769287, "learning_rate": 3.228826361895254e-07, "loss": 1.6799, "step": 27230 }, { "epoch": 0.9752001002739628, "grad_norm": 1.2681914567947388, "learning_rate": 3.21951968267753e-07, "loss": 1.3594, "step": 27231 }, { "epoch": 0.975235912403531, "grad_norm": 1.4830594062805176, "learning_rate": 3.2102264138553774e-07, "loss": 1.3071, "step": 27232 }, { "epoch": 0.9752717245330994, "grad_norm": 1.574739694595337, "learning_rate": 3.200946555553919e-07, "loss": 1.2429, "step": 27233 }, { "epoch": 0.9753075366626677, "grad_norm": 2.1808180809020996, "learning_rate": 3.191680107897943e-07, "loss": 1.2584, "step": 27234 }, { "epoch": 0.9753433487922359, "grad_norm": 1.646176815032959, "learning_rate": 3.1824270710121286e-07, "loss": 1.3929, "step": 27235 }, { "epoch": 0.9753791609218042, "grad_norm": 1.216214656829834, "learning_rate": 3.173187445020931e-07, "loss": 1.1705, "step": 27236 }, { "epoch": 0.9754149730513725, "grad_norm": 1.5510247945785522, "learning_rate": 3.1639612300485844e-07, "loss": 1.4461, "step": 27237 }, { "epoch": 0.9754507851809407, "grad_norm": 1.6966277360916138, "learning_rate": 3.1547484262194336e-07, "loss": 1.5542, "step": 27238 }, { "epoch": 0.975486597310509, "grad_norm": 2.0955851078033447, "learning_rate": 3.1455490336572693e-07, "loss": 1.1453, "step": 27239 }, { "epoch": 0.9755224094400774, "grad_norm": 1.7406843900680542, "learning_rate": 3.13636305248588e-07, "loss": 1.4391, "step": 27240 }, { "epoch": 0.9755582215696457, "grad_norm": 1.6218078136444092, "learning_rate": 3.1271904828288343e-07, "loss": 1.4322, "step": 27241 }, { "epoch": 0.9755940336992139, "grad_norm": 1.6305664777755737, "learning_rate": 3.1180313248097004e-07, "loss": 1.2418, "step": 27242 }, { "epoch": 0.9756298458287822, "grad_norm": 1.9506369829177856, "learning_rate": 3.10888557855149e-07, "loss": 1.5543, "step": 27243 }, { "epoch": 0.9756656579583505, "grad_norm": 1.932220697402954, "learning_rate": 3.099753244177217e-07, "loss": 1.4439, "step": 27244 }, { "epoch": 0.9757014700879187, "grad_norm": 1.4722483158111572, "learning_rate": 3.090634321810004e-07, "loss": 1.3055, "step": 27245 }, { "epoch": 0.975737282217487, "grad_norm": 2.48748779296875, "learning_rate": 3.0815288115723095e-07, "loss": 1.7239, "step": 27246 }, { "epoch": 0.9757730943470554, "grad_norm": 1.9032803773880005, "learning_rate": 3.0724367135868126e-07, "loss": 1.4195, "step": 27247 }, { "epoch": 0.9758089064766237, "grad_norm": 2.151289939880371, "learning_rate": 3.063358027975638e-07, "loss": 1.3273, "step": 27248 }, { "epoch": 0.9758447186061919, "grad_norm": 1.8209279775619507, "learning_rate": 3.054292754861021e-07, "loss": 1.611, "step": 27249 }, { "epoch": 0.9758805307357602, "grad_norm": 1.8769649267196655, "learning_rate": 3.0452408943649756e-07, "loss": 1.5656, "step": 27250 }, { "epoch": 0.9759163428653285, "grad_norm": 1.3402371406555176, "learning_rate": 3.0362024466092933e-07, "loss": 1.4473, "step": 27251 }, { "epoch": 0.9759521549948967, "grad_norm": 1.6566824913024902, "learning_rate": 3.0271774117153207e-07, "loss": 1.3018, "step": 27252 }, { "epoch": 0.975987967124465, "grad_norm": 1.9619542360305786, "learning_rate": 3.01816578980485e-07, "loss": 1.405, "step": 27253 }, { "epoch": 0.9760237792540334, "grad_norm": 1.8266921043395996, "learning_rate": 3.009167580998895e-07, "loss": 1.2748, "step": 27254 }, { "epoch": 0.9760595913836017, "grad_norm": 2.3018181324005127, "learning_rate": 3.0001827854184704e-07, "loss": 1.6089, "step": 27255 }, { "epoch": 0.9760954035131699, "grad_norm": 1.7781779766082764, "learning_rate": 2.9912114031847015e-07, "loss": 1.3481, "step": 27256 }, { "epoch": 0.9761312156427382, "grad_norm": 1.4521387815475464, "learning_rate": 2.9822534344180475e-07, "loss": 1.3735, "step": 27257 }, { "epoch": 0.9761670277723065, "grad_norm": 2.076186180114746, "learning_rate": 2.9733088792391894e-07, "loss": 1.0925, "step": 27258 }, { "epoch": 0.9762028399018747, "grad_norm": 1.6280510425567627, "learning_rate": 2.9643777377682535e-07, "loss": 1.4911, "step": 27259 }, { "epoch": 0.976238652031443, "grad_norm": 1.9344412088394165, "learning_rate": 2.955460010125699e-07, "loss": 1.3981, "step": 27260 }, { "epoch": 0.9762744641610114, "grad_norm": 1.447930932044983, "learning_rate": 2.946555696431208e-07, "loss": 1.571, "step": 27261 }, { "epoch": 0.9763102762905796, "grad_norm": 1.7467442750930786, "learning_rate": 2.9376647968047954e-07, "loss": 1.5898, "step": 27262 }, { "epoch": 0.9763460884201479, "grad_norm": 1.490739107131958, "learning_rate": 2.928787311365921e-07, "loss": 1.4754, "step": 27263 }, { "epoch": 0.9763819005497162, "grad_norm": 1.6217472553253174, "learning_rate": 2.919923240234046e-07, "loss": 1.4527, "step": 27264 }, { "epoch": 0.9764177126792845, "grad_norm": 1.551711916923523, "learning_rate": 2.911072583528518e-07, "loss": 1.3942, "step": 27265 }, { "epoch": 0.9764535248088527, "grad_norm": 1.4863595962524414, "learning_rate": 2.9022353413683534e-07, "loss": 1.371, "step": 27266 }, { "epoch": 0.976489336938421, "grad_norm": 1.9599124193191528, "learning_rate": 2.893411513872457e-07, "loss": 1.4827, "step": 27267 }, { "epoch": 0.9765251490679894, "grad_norm": 1.4526926279067993, "learning_rate": 2.884601101159512e-07, "loss": 1.252, "step": 27268 }, { "epoch": 0.9765609611975576, "grad_norm": 1.6005815267562866, "learning_rate": 2.87580410334809e-07, "loss": 1.4749, "step": 27269 }, { "epoch": 0.9765967733271259, "grad_norm": 1.9566835165023804, "learning_rate": 2.8670205205565406e-07, "loss": 1.6584, "step": 27270 }, { "epoch": 0.9766325854566942, "grad_norm": 1.6158761978149414, "learning_rate": 2.8582503529029916e-07, "loss": 1.0847, "step": 27271 }, { "epoch": 0.9766683975862624, "grad_norm": 1.6226449012756348, "learning_rate": 2.84949360050546e-07, "loss": 1.2105, "step": 27272 }, { "epoch": 0.9767042097158307, "grad_norm": 1.3435142040252686, "learning_rate": 2.8407502634817395e-07, "loss": 1.3259, "step": 27273 }, { "epoch": 0.976740021845399, "grad_norm": 1.224449872970581, "learning_rate": 2.8320203419495153e-07, "loss": 1.0536, "step": 27274 }, { "epoch": 0.9767758339749674, "grad_norm": 1.4458800554275513, "learning_rate": 2.8233038360262474e-07, "loss": 1.2663, "step": 27275 }, { "epoch": 0.9768116461045356, "grad_norm": 1.431313395500183, "learning_rate": 2.814600745829177e-07, "loss": 1.3087, "step": 27276 }, { "epoch": 0.9768474582341039, "grad_norm": 1.2949119806289673, "learning_rate": 2.805911071475209e-07, "loss": 1.3999, "step": 27277 }, { "epoch": 0.9768832703636722, "grad_norm": 1.5865280628204346, "learning_rate": 2.797234813081584e-07, "loss": 1.1732, "step": 27278 }, { "epoch": 0.9769190824932404, "grad_norm": 1.7895091772079468, "learning_rate": 2.788571970764764e-07, "loss": 1.3668, "step": 27279 }, { "epoch": 0.9769548946228087, "grad_norm": 1.5917913913726807, "learning_rate": 2.7799225446414334e-07, "loss": 1.3931, "step": 27280 }, { "epoch": 0.976990706752377, "grad_norm": 1.47417414188385, "learning_rate": 2.7712865348279436e-07, "loss": 1.3361, "step": 27281 }, { "epoch": 0.9770265188819454, "grad_norm": 1.7814379930496216, "learning_rate": 2.762663941440424e-07, "loss": 1.6861, "step": 27282 }, { "epoch": 0.9770623310115136, "grad_norm": 1.5276187658309937, "learning_rate": 2.7540547645950045e-07, "loss": 1.3426, "step": 27283 }, { "epoch": 0.9770981431410819, "grad_norm": 1.551092505455017, "learning_rate": 2.745459004407369e-07, "loss": 1.2497, "step": 27284 }, { "epoch": 0.9771339552706502, "grad_norm": 1.6632046699523926, "learning_rate": 2.736876660993204e-07, "loss": 1.4969, "step": 27285 }, { "epoch": 0.9771697674002184, "grad_norm": 1.940461277961731, "learning_rate": 2.728307734467972e-07, "loss": 1.3537, "step": 27286 }, { "epoch": 0.9772055795297867, "grad_norm": 2.302395820617676, "learning_rate": 2.719752224947025e-07, "loss": 1.3458, "step": 27287 }, { "epoch": 0.977241391659355, "grad_norm": 1.845481038093567, "learning_rate": 2.711210132545383e-07, "loss": 1.3436, "step": 27288 }, { "epoch": 0.9772772037889234, "grad_norm": 1.4163119792938232, "learning_rate": 2.7026814573779534e-07, "loss": 1.3536, "step": 27289 }, { "epoch": 0.9773130159184916, "grad_norm": 1.6288304328918457, "learning_rate": 2.6941661995596446e-07, "loss": 1.4439, "step": 27290 }, { "epoch": 0.9773488280480599, "grad_norm": 1.5382457971572876, "learning_rate": 2.6856643592048093e-07, "loss": 1.5094, "step": 27291 }, { "epoch": 0.9773846401776282, "grad_norm": 1.2881473302841187, "learning_rate": 2.6771759364279114e-07, "loss": 1.5564, "step": 27292 }, { "epoch": 0.9774204523071964, "grad_norm": 1.9076626300811768, "learning_rate": 2.668700931343082e-07, "loss": 1.5091, "step": 27293 }, { "epoch": 0.9774562644367647, "grad_norm": 2.171661376953125, "learning_rate": 2.6602393440645636e-07, "loss": 1.4619, "step": 27294 }, { "epoch": 0.977492076566333, "grad_norm": 1.776262879371643, "learning_rate": 2.651791174706042e-07, "loss": 1.5386, "step": 27295 }, { "epoch": 0.9775278886959013, "grad_norm": 1.5984774827957153, "learning_rate": 2.6433564233810936e-07, "loss": 1.5978, "step": 27296 }, { "epoch": 0.9775637008254696, "grad_norm": 1.9452379941940308, "learning_rate": 2.6349350902032944e-07, "loss": 1.668, "step": 27297 }, { "epoch": 0.9775995129550379, "grad_norm": 1.268587589263916, "learning_rate": 2.6265271752859975e-07, "loss": 1.3742, "step": 27298 }, { "epoch": 0.9776353250846062, "grad_norm": 1.4393203258514404, "learning_rate": 2.618132678742224e-07, "loss": 1.2159, "step": 27299 }, { "epoch": 0.9776711372141744, "grad_norm": 1.389753818511963, "learning_rate": 2.609751600684995e-07, "loss": 1.1791, "step": 27300 }, { "epoch": 0.9777069493437427, "grad_norm": 1.9547994136810303, "learning_rate": 2.601383941226998e-07, "loss": 1.4618, "step": 27301 }, { "epoch": 0.977742761473311, "grad_norm": 1.6610933542251587, "learning_rate": 2.593029700480698e-07, "loss": 1.6401, "step": 27302 }, { "epoch": 0.9777785736028793, "grad_norm": 2.492784261703491, "learning_rate": 2.584688878558783e-07, "loss": 1.4826, "step": 27303 }, { "epoch": 0.9778143857324476, "grad_norm": 1.273895263671875, "learning_rate": 2.576361475573275e-07, "loss": 1.4765, "step": 27304 }, { "epoch": 0.9778501978620159, "grad_norm": 1.622200846672058, "learning_rate": 2.568047491636194e-07, "loss": 1.1659, "step": 27305 }, { "epoch": 0.9778860099915841, "grad_norm": 1.9652822017669678, "learning_rate": 2.559746926859452e-07, "loss": 1.4729, "step": 27306 }, { "epoch": 0.9779218221211524, "grad_norm": 1.710400938987732, "learning_rate": 2.551459781354737e-07, "loss": 1.716, "step": 27307 }, { "epoch": 0.9779576342507207, "grad_norm": 2.0867536067962646, "learning_rate": 2.543186055233515e-07, "loss": 1.2675, "step": 27308 }, { "epoch": 0.977993446380289, "grad_norm": 1.5373716354370117, "learning_rate": 2.534925748607031e-07, "loss": 1.2282, "step": 27309 }, { "epoch": 0.9780292585098573, "grad_norm": 1.6054054498672485, "learning_rate": 2.526678861586529e-07, "loss": 1.6797, "step": 27310 }, { "epoch": 0.9780650706394256, "grad_norm": 1.2186475992202759, "learning_rate": 2.51844539428292e-07, "loss": 1.5985, "step": 27311 }, { "epoch": 0.9781008827689939, "grad_norm": 1.5311107635498047, "learning_rate": 2.5102253468070047e-07, "loss": 1.2037, "step": 27312 }, { "epoch": 0.9781366948985621, "grad_norm": 2.0063748359680176, "learning_rate": 2.50201871926925e-07, "loss": 1.2174, "step": 27313 }, { "epoch": 0.9781725070281304, "grad_norm": 1.6210482120513916, "learning_rate": 2.4938255117802347e-07, "loss": 1.3572, "step": 27314 }, { "epoch": 0.9782083191576987, "grad_norm": 1.8170969486236572, "learning_rate": 2.485645724450092e-07, "loss": 1.2536, "step": 27315 }, { "epoch": 0.978244131287267, "grad_norm": 1.7604585886001587, "learning_rate": 2.4774793573888453e-07, "loss": 1.336, "step": 27316 }, { "epoch": 0.9782799434168353, "grad_norm": 1.353798747062683, "learning_rate": 2.4693264107064075e-07, "loss": 1.3545, "step": 27317 }, { "epoch": 0.9783157555464036, "grad_norm": 1.6163965463638306, "learning_rate": 2.4611868845124673e-07, "loss": 1.1518, "step": 27318 }, { "epoch": 0.9783515676759719, "grad_norm": 1.461172342300415, "learning_rate": 2.453060778916605e-07, "loss": 1.5765, "step": 27319 }, { "epoch": 0.9783873798055401, "grad_norm": 1.6993408203125, "learning_rate": 2.4449480940279547e-07, "loss": 1.5854, "step": 27320 }, { "epoch": 0.9784231919351084, "grad_norm": 1.431376576423645, "learning_rate": 2.436848829955762e-07, "loss": 1.1842, "step": 27321 }, { "epoch": 0.9784590040646767, "grad_norm": 2.1553871631622314, "learning_rate": 2.428762986809052e-07, "loss": 1.4902, "step": 27322 }, { "epoch": 0.9784948161942449, "grad_norm": 1.702622652053833, "learning_rate": 2.420690564696626e-07, "loss": 1.5358, "step": 27323 }, { "epoch": 0.9785306283238133, "grad_norm": 1.2670756578445435, "learning_rate": 2.4126315637269523e-07, "loss": 0.9199, "step": 27324 }, { "epoch": 0.9785664404533816, "grad_norm": 1.6474324464797974, "learning_rate": 2.4045859840085005e-07, "loss": 1.3944, "step": 27325 }, { "epoch": 0.9786022525829499, "grad_norm": 1.6244606971740723, "learning_rate": 2.3965538256496276e-07, "loss": 1.1658, "step": 27326 }, { "epoch": 0.9786380647125181, "grad_norm": 1.3171576261520386, "learning_rate": 2.388535088758248e-07, "loss": 1.4406, "step": 27327 }, { "epoch": 0.9786738768420864, "grad_norm": 1.325938105583191, "learning_rate": 2.3805297734422748e-07, "loss": 1.3195, "step": 27328 }, { "epoch": 0.9787096889716547, "grad_norm": 1.6224009990692139, "learning_rate": 2.3725378798095112e-07, "loss": 1.7065, "step": 27329 }, { "epoch": 0.9787455011012229, "grad_norm": 1.4370075464248657, "learning_rate": 2.364559407967426e-07, "loss": 1.4062, "step": 27330 }, { "epoch": 0.9787813132307913, "grad_norm": 1.7838104963302612, "learning_rate": 2.3565943580232676e-07, "loss": 1.6156, "step": 27331 }, { "epoch": 0.9788171253603596, "grad_norm": 1.8117674589157104, "learning_rate": 2.3486427300841717e-07, "loss": 1.8238, "step": 27332 }, { "epoch": 0.9788529374899279, "grad_norm": 1.5283007621765137, "learning_rate": 2.3407045242573867e-07, "loss": 1.1369, "step": 27333 }, { "epoch": 0.9788887496194961, "grad_norm": 1.715983510017395, "learning_rate": 2.3327797406494934e-07, "loss": 1.3713, "step": 27334 }, { "epoch": 0.9789245617490644, "grad_norm": 1.5859873294830322, "learning_rate": 2.3248683793670735e-07, "loss": 1.4738, "step": 27335 }, { "epoch": 0.9789603738786327, "grad_norm": 1.5380550622940063, "learning_rate": 2.316970440516708e-07, "loss": 1.3575, "step": 27336 }, { "epoch": 0.9789961860082009, "grad_norm": 1.4396132230758667, "learning_rate": 2.309085924204535e-07, "loss": 1.3689, "step": 27337 }, { "epoch": 0.9790319981377693, "grad_norm": 1.9657173156738281, "learning_rate": 2.301214830536691e-07, "loss": 1.7597, "step": 27338 }, { "epoch": 0.9790678102673376, "grad_norm": 2.931874990463257, "learning_rate": 2.2933571596190918e-07, "loss": 1.9935, "step": 27339 }, { "epoch": 0.9791036223969058, "grad_norm": 1.8927358388900757, "learning_rate": 2.2855129115574304e-07, "loss": 1.2429, "step": 27340 }, { "epoch": 0.9791394345264741, "grad_norm": 1.5039610862731934, "learning_rate": 2.277682086457289e-07, "loss": 1.6117, "step": 27341 }, { "epoch": 0.9791752466560424, "grad_norm": 1.5220850706100464, "learning_rate": 2.269864684423917e-07, "loss": 1.2156, "step": 27342 }, { "epoch": 0.9792110587856107, "grad_norm": 1.7066388130187988, "learning_rate": 2.262060705562452e-07, "loss": 1.4429, "step": 27343 }, { "epoch": 0.9792468709151789, "grad_norm": 1.3088314533233643, "learning_rate": 2.2542701499780327e-07, "loss": 1.3944, "step": 27344 }, { "epoch": 0.9792826830447473, "grad_norm": 2.0127079486846924, "learning_rate": 2.246493017775353e-07, "loss": 1.0906, "step": 27345 }, { "epoch": 0.9793184951743156, "grad_norm": 1.8624136447906494, "learning_rate": 2.2387293090592177e-07, "loss": 1.7316, "step": 27346 }, { "epoch": 0.9793543073038838, "grad_norm": 2.0756258964538574, "learning_rate": 2.230979023933877e-07, "loss": 1.341, "step": 27347 }, { "epoch": 0.9793901194334521, "grad_norm": 1.8070365190505981, "learning_rate": 2.2232421625036914e-07, "loss": 1.7985, "step": 27348 }, { "epoch": 0.9794259315630204, "grad_norm": 1.252737283706665, "learning_rate": 2.2155187248728004e-07, "loss": 1.3545, "step": 27349 }, { "epoch": 0.9794617436925886, "grad_norm": 1.4469175338745117, "learning_rate": 2.2078087111450097e-07, "loss": 1.5457, "step": 27350 }, { "epoch": 0.9794975558221569, "grad_norm": 1.5265394449234009, "learning_rate": 2.200112121424125e-07, "loss": 1.4557, "step": 27351 }, { "epoch": 0.9795333679517253, "grad_norm": 1.837325096130371, "learning_rate": 2.192428955813619e-07, "loss": 1.3282, "step": 27352 }, { "epoch": 0.9795691800812936, "grad_norm": 2.001613140106201, "learning_rate": 2.184759214416854e-07, "loss": 1.3861, "step": 27353 }, { "epoch": 0.9796049922108618, "grad_norm": 2.0825603008270264, "learning_rate": 2.177102897337191e-07, "loss": 1.5685, "step": 27354 }, { "epoch": 0.9796408043404301, "grad_norm": 1.7808622121810913, "learning_rate": 2.169460004677437e-07, "loss": 1.4419, "step": 27355 }, { "epoch": 0.9796766164699984, "grad_norm": 1.5627222061157227, "learning_rate": 2.16183053654051e-07, "loss": 1.3342, "step": 27356 }, { "epoch": 0.9797124285995666, "grad_norm": 1.7511463165283203, "learning_rate": 2.1542144930289943e-07, "loss": 1.1166, "step": 27357 }, { "epoch": 0.9797482407291349, "grad_norm": 1.368848204612732, "learning_rate": 2.1466118742453634e-07, "loss": 1.4296, "step": 27358 }, { "epoch": 0.9797840528587033, "grad_norm": 1.8091198205947876, "learning_rate": 2.139022680292091e-07, "loss": 1.3872, "step": 27359 }, { "epoch": 0.9798198649882716, "grad_norm": 1.5368421077728271, "learning_rate": 2.1314469112709844e-07, "loss": 1.1766, "step": 27360 }, { "epoch": 0.9798556771178398, "grad_norm": 1.9813166856765747, "learning_rate": 2.1238845672841845e-07, "loss": 1.4277, "step": 27361 }, { "epoch": 0.9798914892474081, "grad_norm": 2.2372472286224365, "learning_rate": 2.1163356484332764e-07, "loss": 1.2097, "step": 27362 }, { "epoch": 0.9799273013769764, "grad_norm": 1.3528220653533936, "learning_rate": 2.1088001548199565e-07, "loss": 1.6458, "step": 27363 }, { "epoch": 0.9799631135065446, "grad_norm": 1.6533170938491821, "learning_rate": 2.1012780865454773e-07, "loss": 1.089, "step": 27364 }, { "epoch": 0.9799989256361129, "grad_norm": 1.8356963396072388, "learning_rate": 2.093769443711091e-07, "loss": 1.4844, "step": 27365 }, { "epoch": 0.9800347377656813, "grad_norm": 1.6935428380966187, "learning_rate": 2.086274226417939e-07, "loss": 1.5054, "step": 27366 }, { "epoch": 0.9800705498952496, "grad_norm": 1.528594732284546, "learning_rate": 2.0787924347666076e-07, "loss": 1.5082, "step": 27367 }, { "epoch": 0.9801063620248178, "grad_norm": 1.4918920993804932, "learning_rate": 2.071324068858016e-07, "loss": 1.2325, "step": 27368 }, { "epoch": 0.9801421741543861, "grad_norm": 1.6513475179672241, "learning_rate": 2.0638691287925282e-07, "loss": 1.7074, "step": 27369 }, { "epoch": 0.9801779862839544, "grad_norm": 1.2635741233825684, "learning_rate": 2.0564276146703977e-07, "loss": 1.2669, "step": 27370 }, { "epoch": 0.9802137984135226, "grad_norm": 2.1320393085479736, "learning_rate": 2.048999526591766e-07, "loss": 1.4007, "step": 27371 }, { "epoch": 0.9802496105430909, "grad_norm": 1.2138698101043701, "learning_rate": 2.041584864656554e-07, "loss": 1.3064, "step": 27372 }, { "epoch": 0.9802854226726593, "grad_norm": 1.551811695098877, "learning_rate": 2.03418362896457e-07, "loss": 1.4349, "step": 27373 }, { "epoch": 0.9803212348022275, "grad_norm": 1.5845892429351807, "learning_rate": 2.0267958196154013e-07, "loss": 1.3266, "step": 27374 }, { "epoch": 0.9803570469317958, "grad_norm": 1.8183881044387817, "learning_rate": 2.019421436708413e-07, "loss": 1.7351, "step": 27375 }, { "epoch": 0.9803928590613641, "grad_norm": 1.50227952003479, "learning_rate": 2.012060480342748e-07, "loss": 1.6177, "step": 27376 }, { "epoch": 0.9804286711909324, "grad_norm": 1.61687171459198, "learning_rate": 2.0047129506175488e-07, "loss": 1.2517, "step": 27377 }, { "epoch": 0.9804644833205006, "grad_norm": 1.2359267473220825, "learning_rate": 1.9973788476315147e-07, "loss": 1.3682, "step": 27378 }, { "epoch": 0.9805002954500689, "grad_norm": 1.4068397283554077, "learning_rate": 1.9900581714835666e-07, "loss": 1.4976, "step": 27379 }, { "epoch": 0.9805361075796373, "grad_norm": 1.6050434112548828, "learning_rate": 1.9827509222719587e-07, "loss": 1.3544, "step": 27380 }, { "epoch": 0.9805719197092055, "grad_norm": 1.5304006338119507, "learning_rate": 1.9754571000950572e-07, "loss": 1.4646, "step": 27381 }, { "epoch": 0.9806077318387738, "grad_norm": 1.6975919008255005, "learning_rate": 1.968176705051117e-07, "loss": 1.4701, "step": 27382 }, { "epoch": 0.9806435439683421, "grad_norm": 1.4890451431274414, "learning_rate": 1.960909737237837e-07, "loss": 0.9229, "step": 27383 }, { "epoch": 0.9806793560979103, "grad_norm": 1.4120088815689087, "learning_rate": 1.9536561967532507e-07, "loss": 1.7422, "step": 27384 }, { "epoch": 0.9807151682274786, "grad_norm": 1.6301904916763306, "learning_rate": 1.9464160836948354e-07, "loss": 1.2318, "step": 27385 }, { "epoch": 0.9807509803570469, "grad_norm": 1.9923317432403564, "learning_rate": 1.9391893981599575e-07, "loss": 1.6143, "step": 27386 }, { "epoch": 0.9807867924866153, "grad_norm": 1.7062000036239624, "learning_rate": 1.9319761402458726e-07, "loss": 1.5476, "step": 27387 }, { "epoch": 0.9808226046161835, "grad_norm": 2.0269036293029785, "learning_rate": 1.924776310049725e-07, "loss": 1.3775, "step": 27388 }, { "epoch": 0.9808584167457518, "grad_norm": 2.484912395477295, "learning_rate": 1.9175899076682158e-07, "loss": 1.5035, "step": 27389 }, { "epoch": 0.9808942288753201, "grad_norm": 1.5835357904434204, "learning_rate": 1.9104169331981558e-07, "loss": 1.264, "step": 27390 }, { "epoch": 0.9809300410048883, "grad_norm": 1.6880030632019043, "learning_rate": 1.9032573867359126e-07, "loss": 1.5247, "step": 27391 }, { "epoch": 0.9809658531344566, "grad_norm": 1.7247785329818726, "learning_rate": 1.8961112683778536e-07, "loss": 1.5215, "step": 27392 }, { "epoch": 0.9810016652640249, "grad_norm": 1.686508297920227, "learning_rate": 1.8889785782202352e-07, "loss": 1.4663, "step": 27393 }, { "epoch": 0.9810374773935933, "grad_norm": 1.2703536748886108, "learning_rate": 1.8818593163589805e-07, "loss": 1.281, "step": 27394 }, { "epoch": 0.9810732895231615, "grad_norm": 2.6047658920288086, "learning_rate": 1.8747534828897905e-07, "loss": 1.5477, "step": 27395 }, { "epoch": 0.9811091016527298, "grad_norm": 1.490902304649353, "learning_rate": 1.867661077908256e-07, "loss": 1.2813, "step": 27396 }, { "epoch": 0.9811449137822981, "grad_norm": 1.706141710281372, "learning_rate": 1.8605821015098556e-07, "loss": 1.3684, "step": 27397 }, { "epoch": 0.9811807259118663, "grad_norm": 3.6071617603302, "learning_rate": 1.853516553789847e-07, "loss": 1.553, "step": 27398 }, { "epoch": 0.9812165380414346, "grad_norm": 1.1728358268737793, "learning_rate": 1.8464644348432647e-07, "loss": 1.2748, "step": 27399 }, { "epoch": 0.9812523501710029, "grad_norm": 1.6108249425888062, "learning_rate": 1.8394257447650332e-07, "loss": 1.4349, "step": 27400 }, { "epoch": 0.9812881623005713, "grad_norm": 1.6908518075942993, "learning_rate": 1.832400483649632e-07, "loss": 1.6162, "step": 27401 }, { "epoch": 0.9813239744301395, "grad_norm": 1.55814528465271, "learning_rate": 1.825388651591875e-07, "loss": 1.6483, "step": 27402 }, { "epoch": 0.9813597865597078, "grad_norm": 1.2762830257415771, "learning_rate": 1.8183902486859083e-07, "loss": 1.5125, "step": 27403 }, { "epoch": 0.9813955986892761, "grad_norm": 1.6804869174957275, "learning_rate": 1.8114052750259902e-07, "loss": 1.4573, "step": 27404 }, { "epoch": 0.9814314108188443, "grad_norm": 1.4806815385818481, "learning_rate": 1.8044337307059345e-07, "loss": 1.2894, "step": 27405 }, { "epoch": 0.9814672229484126, "grad_norm": 1.408532738685608, "learning_rate": 1.7974756158196658e-07, "loss": 1.4177, "step": 27406 }, { "epoch": 0.9815030350779809, "grad_norm": 1.4657418727874756, "learning_rate": 1.7905309304608876e-07, "loss": 1.3174, "step": 27407 }, { "epoch": 0.9815388472075492, "grad_norm": 1.338862657546997, "learning_rate": 1.7835996747228578e-07, "loss": 1.3876, "step": 27408 }, { "epoch": 0.9815746593371175, "grad_norm": 1.70529305934906, "learning_rate": 1.7766818486988357e-07, "loss": 1.5092, "step": 27409 }, { "epoch": 0.9816104714666858, "grad_norm": 1.8063136339187622, "learning_rate": 1.769777452481969e-07, "loss": 1.3785, "step": 27410 }, { "epoch": 0.981646283596254, "grad_norm": 1.7511066198349, "learning_rate": 1.7628864861651827e-07, "loss": 1.334, "step": 27411 }, { "epoch": 0.9816820957258223, "grad_norm": 2.083026647567749, "learning_rate": 1.7560089498410704e-07, "loss": 1.3795, "step": 27412 }, { "epoch": 0.9817179078553906, "grad_norm": 1.639681339263916, "learning_rate": 1.749144843602224e-07, "loss": 1.3635, "step": 27413 }, { "epoch": 0.9817537199849589, "grad_norm": 1.5684409141540527, "learning_rate": 1.7422941675410143e-07, "loss": 1.3592, "step": 27414 }, { "epoch": 0.9817895321145272, "grad_norm": 2.3734753131866455, "learning_rate": 1.7354569217494788e-07, "loss": 1.4517, "step": 27415 }, { "epoch": 0.9818253442440955, "grad_norm": 1.4742026329040527, "learning_rate": 1.728633106319766e-07, "loss": 1.4555, "step": 27416 }, { "epoch": 0.9818611563736638, "grad_norm": 1.7053145170211792, "learning_rate": 1.721822721343691e-07, "loss": 1.4405, "step": 27417 }, { "epoch": 0.981896968503232, "grad_norm": 1.586531400680542, "learning_rate": 1.7150257669127367e-07, "loss": 1.1004, "step": 27418 }, { "epoch": 0.9819327806328003, "grad_norm": 1.7251858711242676, "learning_rate": 1.7082422431183853e-07, "loss": 1.5102, "step": 27419 }, { "epoch": 0.9819685927623686, "grad_norm": 1.6602956056594849, "learning_rate": 1.701472150051897e-07, "loss": 1.6386, "step": 27420 }, { "epoch": 0.9820044048919369, "grad_norm": 1.4878696203231812, "learning_rate": 1.6947154878045324e-07, "loss": 1.6375, "step": 27421 }, { "epoch": 0.9820402170215052, "grad_norm": 2.1307990550994873, "learning_rate": 1.6879722564669964e-07, "loss": 1.5758, "step": 27422 }, { "epoch": 0.9820760291510735, "grad_norm": 1.5953096151351929, "learning_rate": 1.6812424561299943e-07, "loss": 1.3952, "step": 27423 }, { "epoch": 0.9821118412806418, "grad_norm": 1.6359837055206299, "learning_rate": 1.6745260868841207e-07, "loss": 1.5067, "step": 27424 }, { "epoch": 0.98214765341021, "grad_norm": 1.5047430992126465, "learning_rate": 1.667823148819858e-07, "loss": 1.2965, "step": 27425 }, { "epoch": 0.9821834655397783, "grad_norm": 1.7252825498580933, "learning_rate": 1.661133642027246e-07, "loss": 1.1945, "step": 27426 }, { "epoch": 0.9822192776693466, "grad_norm": 1.6850330829620361, "learning_rate": 1.6544575665963236e-07, "loss": 1.2893, "step": 27427 }, { "epoch": 0.9822550897989148, "grad_norm": 1.569167971611023, "learning_rate": 1.6477949226167967e-07, "loss": 1.4836, "step": 27428 }, { "epoch": 0.9822909019284832, "grad_norm": 1.9602831602096558, "learning_rate": 1.6411457101784822e-07, "loss": 1.337, "step": 27429 }, { "epoch": 0.9823267140580515, "grad_norm": 1.4004954099655151, "learning_rate": 1.6345099293708644e-07, "loss": 1.324, "step": 27430 }, { "epoch": 0.9823625261876198, "grad_norm": 3.534090042114258, "learning_rate": 1.627887580282983e-07, "loss": 1.4684, "step": 27431 }, { "epoch": 0.982398338317188, "grad_norm": 1.5962355136871338, "learning_rate": 1.6212786630041e-07, "loss": 1.2013, "step": 27432 }, { "epoch": 0.9824341504467563, "grad_norm": 1.6603145599365234, "learning_rate": 1.6146831776231442e-07, "loss": 1.3004, "step": 27433 }, { "epoch": 0.9824699625763246, "grad_norm": 1.6587244272232056, "learning_rate": 1.6081011242287115e-07, "loss": 1.5779, "step": 27434 }, { "epoch": 0.9825057747058928, "grad_norm": 1.366642951965332, "learning_rate": 1.6015325029095084e-07, "loss": 1.3437, "step": 27435 }, { "epoch": 0.9825415868354612, "grad_norm": 1.6663930416107178, "learning_rate": 1.5949773137537982e-07, "loss": 1.0643, "step": 27436 }, { "epoch": 0.9825773989650295, "grad_norm": 1.70339834690094, "learning_rate": 1.588435556849843e-07, "loss": 1.2619, "step": 27437 }, { "epoch": 0.9826132110945978, "grad_norm": 1.9648959636688232, "learning_rate": 1.5819072322856842e-07, "loss": 1.2753, "step": 27438 }, { "epoch": 0.982649023224166, "grad_norm": 1.460438847541809, "learning_rate": 1.575392340149029e-07, "loss": 1.3353, "step": 27439 }, { "epoch": 0.9826848353537343, "grad_norm": 1.61161208152771, "learning_rate": 1.5688908805275848e-07, "loss": 1.6739, "step": 27440 }, { "epoch": 0.9827206474833026, "grad_norm": 1.4673449993133545, "learning_rate": 1.5624028535088375e-07, "loss": 1.4056, "step": 27441 }, { "epoch": 0.9827564596128708, "grad_norm": 1.7691820859909058, "learning_rate": 1.5559282591801617e-07, "loss": 1.679, "step": 27442 }, { "epoch": 0.9827922717424392, "grad_norm": 1.7717576026916504, "learning_rate": 1.5494670976284875e-07, "loss": 1.574, "step": 27443 }, { "epoch": 0.9828280838720075, "grad_norm": 1.412382960319519, "learning_rate": 1.543019368940857e-07, "loss": 1.3202, "step": 27444 }, { "epoch": 0.9828638960015758, "grad_norm": 1.745245099067688, "learning_rate": 1.5365850732039778e-07, "loss": 1.6919, "step": 27445 }, { "epoch": 0.982899708131144, "grad_norm": 1.7831408977508545, "learning_rate": 1.5301642105043368e-07, "loss": 1.3777, "step": 27446 }, { "epoch": 0.9829355202607123, "grad_norm": 1.3212556838989258, "learning_rate": 1.5237567809285314e-07, "loss": 1.1704, "step": 27447 }, { "epoch": 0.9829713323902806, "grad_norm": 1.4446337223052979, "learning_rate": 1.5173627845624927e-07, "loss": 1.2402, "step": 27448 }, { "epoch": 0.9830071445198488, "grad_norm": 1.6273493766784668, "learning_rate": 1.510982221492485e-07, "loss": 1.616, "step": 27449 }, { "epoch": 0.9830429566494172, "grad_norm": 1.3335165977478027, "learning_rate": 1.5046150918042178e-07, "loss": 1.4248, "step": 27450 }, { "epoch": 0.9830787687789855, "grad_norm": 1.4034610986709595, "learning_rate": 1.4982613955834001e-07, "loss": 1.2438, "step": 27451 }, { "epoch": 0.9831145809085537, "grad_norm": 1.4101903438568115, "learning_rate": 1.4919211329156302e-07, "loss": 1.3619, "step": 27452 }, { "epoch": 0.983150393038122, "grad_norm": 2.1397769451141357, "learning_rate": 1.4855943038858399e-07, "loss": 1.52, "step": 27453 }, { "epoch": 0.9831862051676903, "grad_norm": 1.4390918016433716, "learning_rate": 1.4792809085795166e-07, "loss": 1.2698, "step": 27454 }, { "epoch": 0.9832220172972586, "grad_norm": 1.4676564931869507, "learning_rate": 1.4729809470814815e-07, "loss": 1.3267, "step": 27455 }, { "epoch": 0.9832578294268268, "grad_norm": 2.3807873725891113, "learning_rate": 1.4666944194764443e-07, "loss": 1.358, "step": 27456 }, { "epoch": 0.9832936415563952, "grad_norm": 1.7312041521072388, "learning_rate": 1.4604213258491152e-07, "loss": 1.5644, "step": 27457 }, { "epoch": 0.9833294536859635, "grad_norm": 1.8110259771347046, "learning_rate": 1.4541616662836488e-07, "loss": 1.3413, "step": 27458 }, { "epoch": 0.9833652658155317, "grad_norm": 1.3456915616989136, "learning_rate": 1.4479154408645335e-07, "loss": 1.2207, "step": 27459 }, { "epoch": 0.9834010779451, "grad_norm": 1.6277025938034058, "learning_rate": 1.441682649675591e-07, "loss": 1.4059, "step": 27460 }, { "epoch": 0.9834368900746683, "grad_norm": 1.6478482484817505, "learning_rate": 1.435463292800754e-07, "loss": 1.4681, "step": 27461 }, { "epoch": 0.9834727022042365, "grad_norm": 2.0339913368225098, "learning_rate": 1.4292573703237333e-07, "loss": 1.4551, "step": 27462 }, { "epoch": 0.9835085143338048, "grad_norm": 2.6009304523468018, "learning_rate": 1.423064882328018e-07, "loss": 1.6065, "step": 27463 }, { "epoch": 0.9835443264633732, "grad_norm": 2.1348941326141357, "learning_rate": 1.4168858288968745e-07, "loss": 1.4424, "step": 27464 }, { "epoch": 0.9835801385929415, "grad_norm": 2.100189208984375, "learning_rate": 1.4107202101134588e-07, "loss": 1.8112, "step": 27465 }, { "epoch": 0.9836159507225097, "grad_norm": 1.6087077856063843, "learning_rate": 1.404568026060704e-07, "loss": 1.2046, "step": 27466 }, { "epoch": 0.983651762852078, "grad_norm": 1.3044302463531494, "learning_rate": 1.3984292768213225e-07, "loss": 1.7624, "step": 27467 }, { "epoch": 0.9836875749816463, "grad_norm": 1.8005906343460083, "learning_rate": 1.3923039624780255e-07, "loss": 1.6117, "step": 27468 }, { "epoch": 0.9837233871112145, "grad_norm": 1.8799329996109009, "learning_rate": 1.3861920831131914e-07, "loss": 1.4647, "step": 27469 }, { "epoch": 0.9837591992407828, "grad_norm": 1.3361802101135254, "learning_rate": 1.380093638808977e-07, "loss": 1.2524, "step": 27470 }, { "epoch": 0.9837950113703512, "grad_norm": 1.5945254564285278, "learning_rate": 1.3740086296475385e-07, "loss": 1.6172, "step": 27471 }, { "epoch": 0.9838308234999195, "grad_norm": 1.4752191305160522, "learning_rate": 1.3679370557106997e-07, "loss": 1.24, "step": 27472 }, { "epoch": 0.9838666356294877, "grad_norm": 1.4537187814712524, "learning_rate": 1.3618789170800618e-07, "loss": 1.3304, "step": 27473 }, { "epoch": 0.983902447759056, "grad_norm": 1.3634134531021118, "learning_rate": 1.355834213837226e-07, "loss": 1.4036, "step": 27474 }, { "epoch": 0.9839382598886243, "grad_norm": 1.8052500486373901, "learning_rate": 1.349802946063461e-07, "loss": 1.4053, "step": 27475 }, { "epoch": 0.9839740720181925, "grad_norm": 1.618231177330017, "learning_rate": 1.3437851138399237e-07, "loss": 1.4643, "step": 27476 }, { "epoch": 0.9840098841477608, "grad_norm": 1.6881462335586548, "learning_rate": 1.33778071724755e-07, "loss": 1.4012, "step": 27477 }, { "epoch": 0.9840456962773292, "grad_norm": 1.7199472188949585, "learning_rate": 1.3317897563671633e-07, "loss": 1.4276, "step": 27478 }, { "epoch": 0.9840815084068975, "grad_norm": 1.4663841724395752, "learning_rate": 1.3258122312793663e-07, "loss": 1.4161, "step": 27479 }, { "epoch": 0.9841173205364657, "grad_norm": 1.3081108331680298, "learning_rate": 1.3198481420646504e-07, "loss": 0.9065, "step": 27480 }, { "epoch": 0.984153132666034, "grad_norm": 1.7379041910171509, "learning_rate": 1.313897488803062e-07, "loss": 1.4373, "step": 27481 }, { "epoch": 0.9841889447956023, "grad_norm": 1.5058616399765015, "learning_rate": 1.3079602715748706e-07, "loss": 1.3416, "step": 27482 }, { "epoch": 0.9842247569251705, "grad_norm": 1.4991620779037476, "learning_rate": 1.3020364904597903e-07, "loss": 1.4064, "step": 27483 }, { "epoch": 0.9842605690547388, "grad_norm": 2.0297024250030518, "learning_rate": 1.296126145537646e-07, "loss": 1.5304, "step": 27484 }, { "epoch": 0.9842963811843072, "grad_norm": 1.7815202474594116, "learning_rate": 1.2902292368878188e-07, "loss": 1.3314, "step": 27485 }, { "epoch": 0.9843321933138754, "grad_norm": 1.5582129955291748, "learning_rate": 1.2843457645896895e-07, "loss": 1.2736, "step": 27486 }, { "epoch": 0.9843680054434437, "grad_norm": 1.263076901435852, "learning_rate": 1.278475728722528e-07, "loss": 1.4625, "step": 27487 }, { "epoch": 0.984403817573012, "grad_norm": 2.047459602355957, "learning_rate": 1.27261912936516e-07, "loss": 1.7214, "step": 27488 }, { "epoch": 0.9844396297025803, "grad_norm": 1.9874540567398071, "learning_rate": 1.2667759665964118e-07, "loss": 1.1864, "step": 27489 }, { "epoch": 0.9844754418321485, "grad_norm": 1.2950546741485596, "learning_rate": 1.260946240494998e-07, "loss": 1.496, "step": 27490 }, { "epoch": 0.9845112539617168, "grad_norm": 1.5544168949127197, "learning_rate": 1.255129951139189e-07, "loss": 1.14, "step": 27491 }, { "epoch": 0.9845470660912852, "grad_norm": 1.3458187580108643, "learning_rate": 1.249327098607367e-07, "loss": 1.3636, "step": 27492 }, { "epoch": 0.9845828782208534, "grad_norm": 1.8006720542907715, "learning_rate": 1.2435376829775803e-07, "loss": 1.6198, "step": 27493 }, { "epoch": 0.9846186903504217, "grad_norm": 1.344710111618042, "learning_rate": 1.2377617043276556e-07, "loss": 1.2155, "step": 27494 }, { "epoch": 0.98465450247999, "grad_norm": 1.3209199905395508, "learning_rate": 1.231999162735309e-07, "loss": 1.2898, "step": 27495 }, { "epoch": 0.9846903146095582, "grad_norm": 1.2484784126281738, "learning_rate": 1.2262500582781445e-07, "loss": 1.2969, "step": 27496 }, { "epoch": 0.9847261267391265, "grad_norm": 1.6258513927459717, "learning_rate": 1.2205143910334338e-07, "loss": 1.3481, "step": 27497 }, { "epoch": 0.9847619388686948, "grad_norm": 2.1914570331573486, "learning_rate": 1.2147921610783374e-07, "loss": 1.7681, "step": 27498 }, { "epoch": 0.9847977509982632, "grad_norm": 1.3298579454421997, "learning_rate": 1.209083368490016e-07, "loss": 0.9676, "step": 27499 }, { "epoch": 0.9848335631278314, "grad_norm": 2.284987211227417, "learning_rate": 1.2033880133449638e-07, "loss": 1.3902, "step": 27500 }, { "epoch": 0.9848693752573997, "grad_norm": 2.084764003753662, "learning_rate": 1.1977060957200083e-07, "loss": 1.6581, "step": 27501 }, { "epoch": 0.984905187386968, "grad_norm": 1.331100344657898, "learning_rate": 1.1920376156916435e-07, "loss": 1.7012, "step": 27502 }, { "epoch": 0.9849409995165362, "grad_norm": 1.537713885307312, "learning_rate": 1.1863825733359201e-07, "loss": 1.6487, "step": 27503 }, { "epoch": 0.9849768116461045, "grad_norm": 1.940346360206604, "learning_rate": 1.1807409687291104e-07, "loss": 1.5181, "step": 27504 }, { "epoch": 0.9850126237756728, "grad_norm": 1.2605899572372437, "learning_rate": 1.1751128019470426e-07, "loss": 1.3682, "step": 27505 }, { "epoch": 0.9850484359052412, "grad_norm": 1.9707603454589844, "learning_rate": 1.1694980730654337e-07, "loss": 1.4205, "step": 27506 }, { "epoch": 0.9850842480348094, "grad_norm": 2.1587588787078857, "learning_rate": 1.163896782159779e-07, "loss": 1.4563, "step": 27507 }, { "epoch": 0.9851200601643777, "grad_norm": 2.022172451019287, "learning_rate": 1.1583089293055738e-07, "loss": 1.4855, "step": 27508 }, { "epoch": 0.985155872293946, "grad_norm": 1.5751782655715942, "learning_rate": 1.152734514577869e-07, "loss": 1.4265, "step": 27509 }, { "epoch": 0.9851916844235142, "grad_norm": 1.86588454246521, "learning_rate": 1.1471735380517156e-07, "loss": 1.5442, "step": 27510 }, { "epoch": 0.9852274965530825, "grad_norm": 1.3863067626953125, "learning_rate": 1.141625999801943e-07, "loss": 1.5659, "step": 27511 }, { "epoch": 0.9852633086826508, "grad_norm": 1.3101366758346558, "learning_rate": 1.1360918999030467e-07, "loss": 1.3462, "step": 27512 }, { "epoch": 0.9852991208122192, "grad_norm": 2.0936365127563477, "learning_rate": 1.1305712384297451e-07, "loss": 1.4808, "step": 27513 }, { "epoch": 0.9853349329417874, "grad_norm": 1.8865522146224976, "learning_rate": 1.1250640154560898e-07, "loss": 1.5132, "step": 27514 }, { "epoch": 0.9853707450713557, "grad_norm": 1.5039201974868774, "learning_rate": 1.1195702310561329e-07, "loss": 1.5329, "step": 27515 }, { "epoch": 0.985406557200924, "grad_norm": 1.6145402193069458, "learning_rate": 1.1140898853040372e-07, "loss": 1.1151, "step": 27516 }, { "epoch": 0.9854423693304922, "grad_norm": 1.550848126411438, "learning_rate": 1.1086229782734103e-07, "loss": 1.1668, "step": 27517 }, { "epoch": 0.9854781814600605, "grad_norm": 1.7086671590805054, "learning_rate": 1.1031695100376382e-07, "loss": 1.6588, "step": 27518 }, { "epoch": 0.9855139935896288, "grad_norm": 1.6773920059204102, "learning_rate": 1.0977294806703286e-07, "loss": 1.6271, "step": 27519 }, { "epoch": 0.9855498057191971, "grad_norm": 1.6794071197509766, "learning_rate": 1.0923028902446453e-07, "loss": 1.5406, "step": 27520 }, { "epoch": 0.9855856178487654, "grad_norm": 1.3817123174667358, "learning_rate": 1.0868897388334187e-07, "loss": 1.7427, "step": 27521 }, { "epoch": 0.9856214299783337, "grad_norm": 1.4821956157684326, "learning_rate": 1.0814900265095907e-07, "loss": 1.5651, "step": 27522 }, { "epoch": 0.985657242107902, "grad_norm": 1.8491703271865845, "learning_rate": 1.0761037533457696e-07, "loss": 1.5515, "step": 27523 }, { "epoch": 0.9856930542374702, "grad_norm": 2.7087044715881348, "learning_rate": 1.0707309194145643e-07, "loss": 1.3486, "step": 27524 }, { "epoch": 0.9857288663670385, "grad_norm": 1.8126064538955688, "learning_rate": 1.0653715247881391e-07, "loss": 1.4137, "step": 27525 }, { "epoch": 0.9857646784966068, "grad_norm": 2.0652072429656982, "learning_rate": 1.0600255695385475e-07, "loss": 1.2903, "step": 27526 }, { "epoch": 0.9858004906261751, "grad_norm": 1.4882497787475586, "learning_rate": 1.054693053737843e-07, "loss": 1.6498, "step": 27527 }, { "epoch": 0.9858363027557434, "grad_norm": 1.413586139678955, "learning_rate": 1.049373977457635e-07, "loss": 1.3082, "step": 27528 }, { "epoch": 0.9858721148853117, "grad_norm": 2.0597503185272217, "learning_rate": 1.0440683407695328e-07, "loss": 1.4598, "step": 27529 }, { "epoch": 0.98590792701488, "grad_norm": 1.3489118814468384, "learning_rate": 1.0387761437449239e-07, "loss": 1.4809, "step": 27530 }, { "epoch": 0.9859437391444482, "grad_norm": 1.6404932737350464, "learning_rate": 1.0334973864550845e-07, "loss": 1.1483, "step": 27531 }, { "epoch": 0.9859795512740165, "grad_norm": 1.574805736541748, "learning_rate": 1.0282320689708469e-07, "loss": 1.4865, "step": 27532 }, { "epoch": 0.9860153634035848, "grad_norm": 1.9894275665283203, "learning_rate": 1.0229801913632653e-07, "loss": 1.5721, "step": 27533 }, { "epoch": 0.986051175533153, "grad_norm": 1.813362717628479, "learning_rate": 1.0177417537028389e-07, "loss": 1.6005, "step": 27534 }, { "epoch": 0.9860869876627214, "grad_norm": 1.4725996255874634, "learning_rate": 1.0125167560601778e-07, "loss": 1.4278, "step": 27535 }, { "epoch": 0.9861227997922897, "grad_norm": 1.5005663633346558, "learning_rate": 1.0073051985054482e-07, "loss": 1.2329, "step": 27536 }, { "epoch": 0.9861586119218579, "grad_norm": 1.8271814584732056, "learning_rate": 1.0021070811088162e-07, "loss": 1.3513, "step": 27537 }, { "epoch": 0.9861944240514262, "grad_norm": 1.5297101736068726, "learning_rate": 9.969224039403369e-08, "loss": 1.5614, "step": 27538 }, { "epoch": 0.9862302361809945, "grad_norm": 1.3022998571395874, "learning_rate": 9.9175116706951e-08, "loss": 1.0406, "step": 27539 }, { "epoch": 0.9862660483105627, "grad_norm": 1.8046320676803589, "learning_rate": 9.86593370566058e-08, "loss": 1.3661, "step": 27540 }, { "epoch": 0.986301860440131, "grad_norm": 1.635322093963623, "learning_rate": 9.814490144993693e-08, "loss": 1.5341, "step": 27541 }, { "epoch": 0.9863376725696994, "grad_norm": 1.8201141357421875, "learning_rate": 9.763180989386112e-08, "loss": 1.3124, "step": 27542 }, { "epoch": 0.9863734846992677, "grad_norm": 1.5187281370162964, "learning_rate": 9.712006239529503e-08, "loss": 1.2908, "step": 27543 }, { "epoch": 0.9864092968288359, "grad_norm": 1.995166540145874, "learning_rate": 9.660965896111095e-08, "loss": 1.5077, "step": 27544 }, { "epoch": 0.9864451089584042, "grad_norm": 1.9507112503051758, "learning_rate": 9.610059959817008e-08, "loss": 1.1559, "step": 27545 }, { "epoch": 0.9864809210879725, "grad_norm": 1.3299881219863892, "learning_rate": 9.559288431333357e-08, "loss": 1.5751, "step": 27546 }, { "epoch": 0.9865167332175407, "grad_norm": 1.6809051036834717, "learning_rate": 9.50865131134182e-08, "loss": 1.2238, "step": 27547 }, { "epoch": 0.986552545347109, "grad_norm": 1.5759165287017822, "learning_rate": 9.458148600525185e-08, "loss": 1.2708, "step": 27548 }, { "epoch": 0.9865883574766774, "grad_norm": 1.5264739990234375, "learning_rate": 9.407780299562908e-08, "loss": 1.8756, "step": 27549 }, { "epoch": 0.9866241696062457, "grad_norm": 1.4205260276794434, "learning_rate": 9.357546409132223e-08, "loss": 1.5195, "step": 27550 }, { "epoch": 0.9866599817358139, "grad_norm": 1.4577481746673584, "learning_rate": 9.307446929908148e-08, "loss": 1.8085, "step": 27551 }, { "epoch": 0.9866957938653822, "grad_norm": 1.9873889684677124, "learning_rate": 9.257481862564587e-08, "loss": 1.3263, "step": 27552 }, { "epoch": 0.9867316059949505, "grad_norm": 1.4352340698242188, "learning_rate": 9.207651207775448e-08, "loss": 1.5041, "step": 27553 }, { "epoch": 0.9867674181245187, "grad_norm": 1.8972949981689453, "learning_rate": 9.157954966210192e-08, "loss": 1.5607, "step": 27554 }, { "epoch": 0.986803230254087, "grad_norm": 2.068582057952881, "learning_rate": 9.108393138536064e-08, "loss": 1.3331, "step": 27555 }, { "epoch": 0.9868390423836554, "grad_norm": 1.2161344289779663, "learning_rate": 9.05896572542253e-08, "loss": 1.1665, "step": 27556 }, { "epoch": 0.9868748545132237, "grad_norm": 2.187922239303589, "learning_rate": 9.009672727533503e-08, "loss": 1.7574, "step": 27557 }, { "epoch": 0.9869106666427919, "grad_norm": 1.6480915546417236, "learning_rate": 8.960514145530674e-08, "loss": 1.1512, "step": 27558 }, { "epoch": 0.9869464787723602, "grad_norm": 1.5355759859085083, "learning_rate": 8.911489980076848e-08, "loss": 1.5087, "step": 27559 }, { "epoch": 0.9869822909019285, "grad_norm": 2.0616066455841064, "learning_rate": 8.862600231832607e-08, "loss": 1.545, "step": 27560 }, { "epoch": 0.9870181030314967, "grad_norm": 1.3449019193649292, "learning_rate": 8.813844901452984e-08, "loss": 1.6202, "step": 27561 }, { "epoch": 0.987053915161065, "grad_norm": 1.6275995969772339, "learning_rate": 8.765223989596338e-08, "loss": 1.4442, "step": 27562 }, { "epoch": 0.9870897272906334, "grad_norm": 1.3992468118667603, "learning_rate": 8.716737496915483e-08, "loss": 1.2691, "step": 27563 }, { "epoch": 0.9871255394202016, "grad_norm": 1.3481507301330566, "learning_rate": 8.66838542406212e-08, "loss": 1.29, "step": 27564 }, { "epoch": 0.9871613515497699, "grad_norm": 1.4527891874313354, "learning_rate": 8.620167771689058e-08, "loss": 1.2298, "step": 27565 }, { "epoch": 0.9871971636793382, "grad_norm": 1.4806122779846191, "learning_rate": 8.572084540443558e-08, "loss": 1.4566, "step": 27566 }, { "epoch": 0.9872329758089065, "grad_norm": 1.745713472366333, "learning_rate": 8.524135730971772e-08, "loss": 1.4321, "step": 27567 }, { "epoch": 0.9872687879384747, "grad_norm": 1.633641242980957, "learning_rate": 8.476321343920957e-08, "loss": 1.3979, "step": 27568 }, { "epoch": 0.987304600068043, "grad_norm": 1.8932297229766846, "learning_rate": 8.428641379931713e-08, "loss": 1.4096, "step": 27569 }, { "epoch": 0.9873404121976114, "grad_norm": 1.2509734630584717, "learning_rate": 8.381095839647967e-08, "loss": 1.2951, "step": 27570 }, { "epoch": 0.9873762243271796, "grad_norm": 1.5911204814910889, "learning_rate": 8.333684723708102e-08, "loss": 1.2692, "step": 27571 }, { "epoch": 0.9874120364567479, "grad_norm": 1.4687579870224, "learning_rate": 8.286408032749382e-08, "loss": 1.694, "step": 27572 }, { "epoch": 0.9874478485863162, "grad_norm": 1.4162245988845825, "learning_rate": 8.239265767410187e-08, "loss": 1.3073, "step": 27573 }, { "epoch": 0.9874836607158844, "grad_norm": 1.4807811975479126, "learning_rate": 8.192257928322233e-08, "loss": 1.3811, "step": 27574 }, { "epoch": 0.9875194728454527, "grad_norm": 2.3424911499023438, "learning_rate": 8.145384516118349e-08, "loss": 1.4198, "step": 27575 }, { "epoch": 0.987555284975021, "grad_norm": 1.6639134883880615, "learning_rate": 8.098645531431359e-08, "loss": 1.3878, "step": 27576 }, { "epoch": 0.9875910971045894, "grad_norm": 1.885317087173462, "learning_rate": 8.052040974887432e-08, "loss": 1.669, "step": 27577 }, { "epoch": 0.9876269092341576, "grad_norm": 1.4320241212844849, "learning_rate": 8.005570847113841e-08, "loss": 1.1812, "step": 27578 }, { "epoch": 0.9876627213637259, "grad_norm": 1.843773603439331, "learning_rate": 7.959235148737865e-08, "loss": 1.4705, "step": 27579 }, { "epoch": 0.9876985334932942, "grad_norm": 1.7358323335647583, "learning_rate": 7.913033880381226e-08, "loss": 1.5676, "step": 27580 }, { "epoch": 0.9877343456228624, "grad_norm": 1.5030070543289185, "learning_rate": 7.866967042665651e-08, "loss": 1.2817, "step": 27581 }, { "epoch": 0.9877701577524307, "grad_norm": 2.4306750297546387, "learning_rate": 7.821034636211755e-08, "loss": 1.5123, "step": 27582 }, { "epoch": 0.987805969881999, "grad_norm": 2.3857903480529785, "learning_rate": 7.77523666163571e-08, "loss": 1.4726, "step": 27583 }, { "epoch": 0.9878417820115674, "grad_norm": 1.7818009853363037, "learning_rate": 7.729573119555911e-08, "loss": 1.5097, "step": 27584 }, { "epoch": 0.9878775941411356, "grad_norm": 1.879576325416565, "learning_rate": 7.684044010585201e-08, "loss": 1.3873, "step": 27585 }, { "epoch": 0.9879134062707039, "grad_norm": 1.4474228620529175, "learning_rate": 7.638649335336423e-08, "loss": 1.5264, "step": 27586 }, { "epoch": 0.9879492184002722, "grad_norm": 1.6005494594573975, "learning_rate": 7.593389094420201e-08, "loss": 1.217, "step": 27587 }, { "epoch": 0.9879850305298404, "grad_norm": 1.4687864780426025, "learning_rate": 7.548263288446045e-08, "loss": 1.5201, "step": 27588 }, { "epoch": 0.9880208426594087, "grad_norm": 1.9801404476165771, "learning_rate": 7.503271918020138e-08, "loss": 1.2031, "step": 27589 }, { "epoch": 0.988056654788977, "grad_norm": 2.481457233428955, "learning_rate": 7.458414983748663e-08, "loss": 1.7782, "step": 27590 }, { "epoch": 0.9880924669185454, "grad_norm": 1.6557172536849976, "learning_rate": 7.41369248623447e-08, "loss": 1.2114, "step": 27591 }, { "epoch": 0.9881282790481136, "grad_norm": 2.5019419193267822, "learning_rate": 7.369104426080409e-08, "loss": 1.3993, "step": 27592 }, { "epoch": 0.9881640911776819, "grad_norm": 1.443387508392334, "learning_rate": 7.324650803884891e-08, "loss": 1.6107, "step": 27593 }, { "epoch": 0.9881999033072502, "grad_norm": 1.4562711715698242, "learning_rate": 7.280331620246328e-08, "loss": 1.4164, "step": 27594 }, { "epoch": 0.9882357154368184, "grad_norm": 1.7285951375961304, "learning_rate": 7.236146875762017e-08, "loss": 1.7147, "step": 27595 }, { "epoch": 0.9882715275663867, "grad_norm": 1.6666792631149292, "learning_rate": 7.19209657102482e-08, "loss": 1.2269, "step": 27596 }, { "epoch": 0.988307339695955, "grad_norm": 1.4787660837173462, "learning_rate": 7.148180706628704e-08, "loss": 1.088, "step": 27597 }, { "epoch": 0.9883431518255233, "grad_norm": 1.591696858406067, "learning_rate": 7.104399283163199e-08, "loss": 1.429, "step": 27598 }, { "epoch": 0.9883789639550916, "grad_norm": 1.709412932395935, "learning_rate": 7.060752301218942e-08, "loss": 1.2527, "step": 27599 }, { "epoch": 0.9884147760846599, "grad_norm": 1.553296446800232, "learning_rate": 7.017239761381022e-08, "loss": 1.4105, "step": 27600 }, { "epoch": 0.9884505882142282, "grad_norm": 1.3799681663513184, "learning_rate": 6.973861664237857e-08, "loss": 1.441, "step": 27601 }, { "epoch": 0.9884864003437964, "grad_norm": 1.5649609565734863, "learning_rate": 6.930618010370094e-08, "loss": 1.4126, "step": 27602 }, { "epoch": 0.9885222124733647, "grad_norm": 1.5194275379180908, "learning_rate": 6.887508800361708e-08, "loss": 1.1752, "step": 27603 }, { "epoch": 0.988558024602933, "grad_norm": 1.4459376335144043, "learning_rate": 6.844534034791128e-08, "loss": 1.2249, "step": 27604 }, { "epoch": 0.9885938367325013, "grad_norm": 1.5532654523849487, "learning_rate": 6.801693714236779e-08, "loss": 1.541, "step": 27605 }, { "epoch": 0.9886296488620696, "grad_norm": 1.2949588298797607, "learning_rate": 6.758987839275976e-08, "loss": 1.4817, "step": 27606 }, { "epoch": 0.9886654609916379, "grad_norm": 1.7721437215805054, "learning_rate": 6.716416410481596e-08, "loss": 1.6125, "step": 27607 }, { "epoch": 0.9887012731212061, "grad_norm": 1.6902053356170654, "learning_rate": 6.673979428428733e-08, "loss": 1.4175, "step": 27608 }, { "epoch": 0.9887370852507744, "grad_norm": 1.6286303997039795, "learning_rate": 6.631676893685823e-08, "loss": 1.6024, "step": 27609 }, { "epoch": 0.9887728973803427, "grad_norm": 1.3310688734054565, "learning_rate": 6.589508806823518e-08, "loss": 1.2111, "step": 27610 }, { "epoch": 0.988808709509911, "grad_norm": 2.158609628677368, "learning_rate": 6.547475168409145e-08, "loss": 1.1975, "step": 27611 }, { "epoch": 0.9888445216394793, "grad_norm": 1.4116407632827759, "learning_rate": 6.505575979007805e-08, "loss": 1.2926, "step": 27612 }, { "epoch": 0.9888803337690476, "grad_norm": 1.7177308797836304, "learning_rate": 6.463811239183492e-08, "loss": 1.272, "step": 27613 }, { "epoch": 0.9889161458986159, "grad_norm": 1.2889899015426636, "learning_rate": 6.42218094949798e-08, "loss": 1.3474, "step": 27614 }, { "epoch": 0.9889519580281841, "grad_norm": 2.355888605117798, "learning_rate": 6.38068511051082e-08, "loss": 1.6049, "step": 27615 }, { "epoch": 0.9889877701577524, "grad_norm": 1.743425726890564, "learning_rate": 6.339323722780455e-08, "loss": 1.738, "step": 27616 }, { "epoch": 0.9890235822873207, "grad_norm": 1.74456787109375, "learning_rate": 6.298096786864216e-08, "loss": 1.4269, "step": 27617 }, { "epoch": 0.989059394416889, "grad_norm": 1.743873119354248, "learning_rate": 6.257004303316106e-08, "loss": 1.6638, "step": 27618 }, { "epoch": 0.9890952065464573, "grad_norm": 2.2863898277282715, "learning_rate": 6.216046272687904e-08, "loss": 1.3663, "step": 27619 }, { "epoch": 0.9891310186760256, "grad_norm": 2.197343111038208, "learning_rate": 6.1752226955325e-08, "loss": 1.6005, "step": 27620 }, { "epoch": 0.9891668308055939, "grad_norm": 1.486518144607544, "learning_rate": 6.134533572398349e-08, "loss": 1.5046, "step": 27621 }, { "epoch": 0.9892026429351621, "grad_norm": 1.5760600566864014, "learning_rate": 6.093978903833897e-08, "loss": 1.3962, "step": 27622 }, { "epoch": 0.9892384550647304, "grad_norm": 1.749154806137085, "learning_rate": 6.053558690382045e-08, "loss": 1.4331, "step": 27623 }, { "epoch": 0.9892742671942987, "grad_norm": 1.6556254625320435, "learning_rate": 6.013272932590131e-08, "loss": 1.5677, "step": 27624 }, { "epoch": 0.9893100793238669, "grad_norm": 1.8295544385910034, "learning_rate": 5.973121630996615e-08, "loss": 1.1101, "step": 27625 }, { "epoch": 0.9893458914534353, "grad_norm": 1.4694961309432983, "learning_rate": 5.9331047861443944e-08, "loss": 1.3744, "step": 27626 }, { "epoch": 0.9893817035830036, "grad_norm": 1.7429505586624146, "learning_rate": 5.893222398569709e-08, "loss": 1.4978, "step": 27627 }, { "epoch": 0.9894175157125719, "grad_norm": 1.6086716651916504, "learning_rate": 5.8534744688110156e-08, "loss": 1.007, "step": 27628 }, { "epoch": 0.9894533278421401, "grad_norm": 1.5418668985366821, "learning_rate": 5.8138609974023316e-08, "loss": 1.2586, "step": 27629 }, { "epoch": 0.9894891399717084, "grad_norm": 1.3214435577392578, "learning_rate": 5.774381984876565e-08, "loss": 1.4815, "step": 27630 }, { "epoch": 0.9895249521012767, "grad_norm": 1.4706875085830688, "learning_rate": 5.735037431765511e-08, "loss": 1.1197, "step": 27631 }, { "epoch": 0.9895607642308449, "grad_norm": 1.5423040390014648, "learning_rate": 5.6958273385965264e-08, "loss": 1.1352, "step": 27632 }, { "epoch": 0.9895965763604133, "grad_norm": 1.6856582164764404, "learning_rate": 5.656751705899188e-08, "loss": 1.5453, "step": 27633 }, { "epoch": 0.9896323884899816, "grad_norm": 1.7213730812072754, "learning_rate": 5.617810534198631e-08, "loss": 1.136, "step": 27634 }, { "epoch": 0.9896682006195499, "grad_norm": 2.0866880416870117, "learning_rate": 5.57900382401777e-08, "loss": 1.1906, "step": 27635 }, { "epoch": 0.9897040127491181, "grad_norm": 2.5806922912597656, "learning_rate": 5.540331575880631e-08, "loss": 1.7527, "step": 27636 }, { "epoch": 0.9897398248786864, "grad_norm": 1.7330875396728516, "learning_rate": 5.501793790305687e-08, "loss": 1.3741, "step": 27637 }, { "epoch": 0.9897756370082547, "grad_norm": 1.4673677682876587, "learning_rate": 5.4633904678125234e-08, "loss": 1.5148, "step": 27638 }, { "epoch": 0.9898114491378229, "grad_norm": 1.8655990362167358, "learning_rate": 5.425121608917394e-08, "loss": 1.6041, "step": 27639 }, { "epoch": 0.9898472612673913, "grad_norm": 1.5611985921859741, "learning_rate": 5.3869872141343313e-08, "loss": 1.3083, "step": 27640 }, { "epoch": 0.9898830733969596, "grad_norm": 1.6236423254013062, "learning_rate": 5.348987283978479e-08, "loss": 1.3885, "step": 27641 }, { "epoch": 0.9899188855265278, "grad_norm": 1.4711804389953613, "learning_rate": 5.3111218189594304e-08, "loss": 1.2077, "step": 27642 }, { "epoch": 0.9899546976560961, "grad_norm": 1.3890304565429688, "learning_rate": 5.2733908195867764e-08, "loss": 1.526, "step": 27643 }, { "epoch": 0.9899905097856644, "grad_norm": 1.4793809652328491, "learning_rate": 5.23579428636789e-08, "loss": 1.4713, "step": 27644 }, { "epoch": 0.9900263219152327, "grad_norm": 1.3004274368286133, "learning_rate": 5.1983322198101425e-08, "loss": 1.5026, "step": 27645 }, { "epoch": 0.9900621340448009, "grad_norm": 2.2040960788726807, "learning_rate": 5.161004620416465e-08, "loss": 1.1282, "step": 27646 }, { "epoch": 0.9900979461743693, "grad_norm": 1.7686691284179688, "learning_rate": 5.1238114886875685e-08, "loss": 1.3961, "step": 27647 }, { "epoch": 0.9901337583039376, "grad_norm": 1.619179606437683, "learning_rate": 5.086752825126384e-08, "loss": 1.5506, "step": 27648 }, { "epoch": 0.9901695704335058, "grad_norm": 1.7559400796890259, "learning_rate": 5.049828630230291e-08, "loss": 1.3118, "step": 27649 }, { "epoch": 0.9902053825630741, "grad_norm": 1.9877136945724487, "learning_rate": 5.01303890449667e-08, "loss": 1.5718, "step": 27650 }, { "epoch": 0.9902411946926424, "grad_norm": 1.8758115768432617, "learning_rate": 4.976383648419569e-08, "loss": 1.306, "step": 27651 }, { "epoch": 0.9902770068222106, "grad_norm": 2.0945935249328613, "learning_rate": 4.9398628624930385e-08, "loss": 1.5006, "step": 27652 }, { "epoch": 0.9903128189517789, "grad_norm": 1.786147117614746, "learning_rate": 4.903476547206687e-08, "loss": 1.273, "step": 27653 }, { "epoch": 0.9903486310813473, "grad_norm": 1.7926175594329834, "learning_rate": 4.8672247030523425e-08, "loss": 1.3818, "step": 27654 }, { "epoch": 0.9903844432109156, "grad_norm": 2.316483497619629, "learning_rate": 4.8311073305162825e-08, "loss": 1.248, "step": 27655 }, { "epoch": 0.9904202553404838, "grad_norm": 2.2332239151000977, "learning_rate": 4.795124430085896e-08, "loss": 1.1146, "step": 27656 }, { "epoch": 0.9904560674700521, "grad_norm": 1.80547297000885, "learning_rate": 4.7592760022430185e-08, "loss": 1.372, "step": 27657 }, { "epoch": 0.9904918795996204, "grad_norm": 1.2292410135269165, "learning_rate": 4.723562047471708e-08, "loss": 1.6469, "step": 27658 }, { "epoch": 0.9905276917291886, "grad_norm": 1.5700558423995972, "learning_rate": 4.687982566251581e-08, "loss": 1.3877, "step": 27659 }, { "epoch": 0.9905635038587569, "grad_norm": 1.5725685358047485, "learning_rate": 4.652537559062253e-08, "loss": 1.337, "step": 27660 }, { "epoch": 0.9905993159883253, "grad_norm": 1.444566249847412, "learning_rate": 4.617227026378901e-08, "loss": 1.2462, "step": 27661 }, { "epoch": 0.9906351281178936, "grad_norm": 1.5620014667510986, "learning_rate": 4.582050968677809e-08, "loss": 1.3293, "step": 27662 }, { "epoch": 0.9906709402474618, "grad_norm": 1.445263147354126, "learning_rate": 4.5470093864330435e-08, "loss": 1.4857, "step": 27663 }, { "epoch": 0.9907067523770301, "grad_norm": 2.2626137733459473, "learning_rate": 4.5121022801142275e-08, "loss": 1.2967, "step": 27664 }, { "epoch": 0.9907425645065984, "grad_norm": 1.5922274589538574, "learning_rate": 4.477329650192097e-08, "loss": 1.2494, "step": 27665 }, { "epoch": 0.9907783766361666, "grad_norm": 1.4853841066360474, "learning_rate": 4.442691497134055e-08, "loss": 1.2511, "step": 27666 }, { "epoch": 0.9908141887657349, "grad_norm": 1.8890957832336426, "learning_rate": 4.408187821406395e-08, "loss": 1.6964, "step": 27667 }, { "epoch": 0.9908500008953033, "grad_norm": 1.2731983661651611, "learning_rate": 4.373818623473191e-08, "loss": 1.3958, "step": 27668 }, { "epoch": 0.9908858130248716, "grad_norm": 1.4172587394714355, "learning_rate": 4.3395839037962956e-08, "loss": 1.3414, "step": 27669 }, { "epoch": 0.9909216251544398, "grad_norm": 1.5718883275985718, "learning_rate": 4.305483662837562e-08, "loss": 1.5334, "step": 27670 }, { "epoch": 0.9909574372840081, "grad_norm": 1.6942057609558105, "learning_rate": 4.2715179010555106e-08, "loss": 1.4271, "step": 27671 }, { "epoch": 0.9909932494135764, "grad_norm": 1.4615954160690308, "learning_rate": 4.2376866189053346e-08, "loss": 1.4432, "step": 27672 }, { "epoch": 0.9910290615431446, "grad_norm": 1.955811858177185, "learning_rate": 4.2039898168444445e-08, "loss": 1.4947, "step": 27673 }, { "epoch": 0.9910648736727129, "grad_norm": 1.6968096494674683, "learning_rate": 4.170427495324702e-08, "loss": 1.558, "step": 27674 }, { "epoch": 0.9911006858022813, "grad_norm": 1.3976668119430542, "learning_rate": 4.1369996547979685e-08, "loss": 1.142, "step": 27675 }, { "epoch": 0.9911364979318495, "grad_norm": 2.15129017829895, "learning_rate": 4.1037062957138825e-08, "loss": 1.7194, "step": 27676 }, { "epoch": 0.9911723100614178, "grad_norm": 1.6736570596694946, "learning_rate": 4.070547418522086e-08, "loss": 1.307, "step": 27677 }, { "epoch": 0.9912081221909861, "grad_norm": 1.7805536985397339, "learning_rate": 4.037523023666667e-08, "loss": 1.5194, "step": 27678 }, { "epoch": 0.9912439343205544, "grad_norm": 1.7508223056793213, "learning_rate": 4.0046331115917157e-08, "loss": 1.5008, "step": 27679 }, { "epoch": 0.9912797464501226, "grad_norm": 1.3880499601364136, "learning_rate": 3.9718776827413204e-08, "loss": 1.5748, "step": 27680 }, { "epoch": 0.9913155585796909, "grad_norm": 2.6532273292541504, "learning_rate": 3.9392567375551306e-08, "loss": 1.1765, "step": 27681 }, { "epoch": 0.9913513707092593, "grad_norm": 1.741824984550476, "learning_rate": 3.906770276471683e-08, "loss": 1.1866, "step": 27682 }, { "epoch": 0.9913871828388275, "grad_norm": 1.3869082927703857, "learning_rate": 3.8744182999295164e-08, "loss": 1.4337, "step": 27683 }, { "epoch": 0.9914229949683958, "grad_norm": 2.356285333633423, "learning_rate": 3.842200808362728e-08, "loss": 1.7948, "step": 27684 }, { "epoch": 0.9914588070979641, "grad_norm": 1.4748954772949219, "learning_rate": 3.810117802204305e-08, "loss": 1.323, "step": 27685 }, { "epoch": 0.9914946192275323, "grad_norm": 1.8216158151626587, "learning_rate": 3.778169281887234e-08, "loss": 1.4063, "step": 27686 }, { "epoch": 0.9915304313571006, "grad_norm": 1.3165158033370972, "learning_rate": 3.746355247841171e-08, "loss": 1.1767, "step": 27687 }, { "epoch": 0.9915662434866689, "grad_norm": 2.005211114883423, "learning_rate": 3.7146757004924425e-08, "loss": 1.1474, "step": 27688 }, { "epoch": 0.9916020556162373, "grad_norm": 1.54912531375885, "learning_rate": 3.683130640269594e-08, "loss": 1.4143, "step": 27689 }, { "epoch": 0.9916378677458055, "grad_norm": 1.9291484355926514, "learning_rate": 3.651720067595621e-08, "loss": 1.4354, "step": 27690 }, { "epoch": 0.9916736798753738, "grad_norm": 1.4543896913528442, "learning_rate": 3.620443982892407e-08, "loss": 1.5983, "step": 27691 }, { "epoch": 0.9917094920049421, "grad_norm": 2.140312910079956, "learning_rate": 3.589302386582949e-08, "loss": 1.352, "step": 27692 }, { "epoch": 0.9917453041345103, "grad_norm": 1.8118711709976196, "learning_rate": 3.558295279084689e-08, "loss": 1.4931, "step": 27693 }, { "epoch": 0.9917811162640786, "grad_norm": 1.5152671337127686, "learning_rate": 3.527422660815072e-08, "loss": 1.3262, "step": 27694 }, { "epoch": 0.9918169283936469, "grad_norm": 1.843450903892517, "learning_rate": 3.4966845321893204e-08, "loss": 1.3335, "step": 27695 }, { "epoch": 0.9918527405232153, "grad_norm": 2.2369720935821533, "learning_rate": 3.4660808936215485e-08, "loss": 1.5118, "step": 27696 }, { "epoch": 0.9918885526527835, "grad_norm": 1.4145985841751099, "learning_rate": 3.435611745522538e-08, "loss": 1.5992, "step": 27697 }, { "epoch": 0.9919243647823518, "grad_norm": 1.7589112520217896, "learning_rate": 3.405277088301961e-08, "loss": 1.3502, "step": 27698 }, { "epoch": 0.9919601769119201, "grad_norm": 1.60627281665802, "learning_rate": 3.375076922370601e-08, "loss": 1.2305, "step": 27699 }, { "epoch": 0.9919959890414883, "grad_norm": 1.4741889238357544, "learning_rate": 3.345011248131469e-08, "loss": 1.3674, "step": 27700 }, { "epoch": 0.9920318011710566, "grad_norm": 1.848053216934204, "learning_rate": 3.3150800659909055e-08, "loss": 1.4557, "step": 27701 }, { "epoch": 0.9920676133006249, "grad_norm": 1.6344389915466309, "learning_rate": 3.285283376350812e-08, "loss": 1.3267, "step": 27702 }, { "epoch": 0.9921034254301933, "grad_norm": 1.2263892889022827, "learning_rate": 3.255621179613089e-08, "loss": 1.3394, "step": 27703 }, { "epoch": 0.9921392375597615, "grad_norm": 1.8323966264724731, "learning_rate": 3.226093476175196e-08, "loss": 1.4297, "step": 27704 }, { "epoch": 0.9921750496893298, "grad_norm": 1.2270833253860474, "learning_rate": 3.1967002664357036e-08, "loss": 1.1901, "step": 27705 }, { "epoch": 0.9922108618188981, "grad_norm": 1.5533965826034546, "learning_rate": 3.167441550789851e-08, "loss": 1.455, "step": 27706 }, { "epoch": 0.9922466739484663, "grad_norm": 1.9076077938079834, "learning_rate": 3.138317329630658e-08, "loss": 1.3796, "step": 27707 }, { "epoch": 0.9922824860780346, "grad_norm": 1.721117377281189, "learning_rate": 3.109327603351142e-08, "loss": 1.2874, "step": 27708 }, { "epoch": 0.9923182982076029, "grad_norm": 1.3826229572296143, "learning_rate": 3.080472372339882e-08, "loss": 1.524, "step": 27709 }, { "epoch": 0.9923541103371712, "grad_norm": 1.7644851207733154, "learning_rate": 3.0517516369865665e-08, "loss": 1.3418, "step": 27710 }, { "epoch": 0.9923899224667395, "grad_norm": 1.6510146856307983, "learning_rate": 3.0231653976764415e-08, "loss": 1.2241, "step": 27711 }, { "epoch": 0.9924257345963078, "grad_norm": 1.384408950805664, "learning_rate": 2.994713654793646e-08, "loss": 1.5097, "step": 27712 }, { "epoch": 0.992461546725876, "grad_norm": 1.7258027791976929, "learning_rate": 2.966396408722316e-08, "loss": 1.3808, "step": 27713 }, { "epoch": 0.9924973588554443, "grad_norm": 2.7897722721099854, "learning_rate": 2.9382136598432587e-08, "loss": 0.9909, "step": 27714 }, { "epoch": 0.9925331709850126, "grad_norm": 1.8314100503921509, "learning_rate": 2.9101654085350594e-08, "loss": 1.4235, "step": 27715 }, { "epoch": 0.9925689831145809, "grad_norm": 1.441307783126831, "learning_rate": 2.8822516551751942e-08, "loss": 1.1951, "step": 27716 }, { "epoch": 0.9926047952441492, "grad_norm": 1.8378783464431763, "learning_rate": 2.854472400138919e-08, "loss": 1.2973, "step": 27717 }, { "epoch": 0.9926406073737175, "grad_norm": 1.8195862770080566, "learning_rate": 2.8268276438003782e-08, "loss": 1.4349, "step": 27718 }, { "epoch": 0.9926764195032858, "grad_norm": 1.6150768995285034, "learning_rate": 2.799317386531497e-08, "loss": 1.547, "step": 27719 }, { "epoch": 0.992712231632854, "grad_norm": 1.6209663152694702, "learning_rate": 2.7719416287030897e-08, "loss": 1.4109, "step": 27720 }, { "epoch": 0.9927480437624223, "grad_norm": 1.6728167533874512, "learning_rate": 2.74470037068264e-08, "loss": 1.4065, "step": 27721 }, { "epoch": 0.9927838558919906, "grad_norm": 1.3521865606307983, "learning_rate": 2.717593612835412e-08, "loss": 1.4566, "step": 27722 }, { "epoch": 0.9928196680215589, "grad_norm": 1.5996589660644531, "learning_rate": 2.6906213555288884e-08, "loss": 1.4181, "step": 27723 }, { "epoch": 0.9928554801511272, "grad_norm": 1.476045846939087, "learning_rate": 2.6637835991238924e-08, "loss": 1.2518, "step": 27724 }, { "epoch": 0.9928912922806955, "grad_norm": 2.2209970951080322, "learning_rate": 2.6370803439812463e-08, "loss": 1.0453, "step": 27725 }, { "epoch": 0.9929271044102638, "grad_norm": 1.3425819873809814, "learning_rate": 2.6105115904617726e-08, "loss": 1.5999, "step": 27726 }, { "epoch": 0.992962916539832, "grad_norm": 1.6703921556472778, "learning_rate": 2.584077338921853e-08, "loss": 1.3171, "step": 27727 }, { "epoch": 0.9929987286694003, "grad_norm": 1.6346490383148193, "learning_rate": 2.557777589717869e-08, "loss": 1.4642, "step": 27728 }, { "epoch": 0.9930345407989686, "grad_norm": 1.7949005365371704, "learning_rate": 2.5316123432028714e-08, "loss": 1.6685, "step": 27729 }, { "epoch": 0.9930703529285368, "grad_norm": 1.5301461219787598, "learning_rate": 2.5055815997299113e-08, "loss": 1.7001, "step": 27730 }, { "epoch": 0.9931061650581052, "grad_norm": 1.6735132932662964, "learning_rate": 2.479685359647599e-08, "loss": 1.4314, "step": 27731 }, { "epoch": 0.9931419771876735, "grad_norm": 1.8682321310043335, "learning_rate": 2.453923623305654e-08, "loss": 1.4695, "step": 27732 }, { "epoch": 0.9931777893172418, "grad_norm": 1.7532639503479004, "learning_rate": 2.4282963910504664e-08, "loss": 1.6481, "step": 27733 }, { "epoch": 0.99321360144681, "grad_norm": 1.6439052820205688, "learning_rate": 2.4028036632262053e-08, "loss": 1.3373, "step": 27734 }, { "epoch": 0.9932494135763783, "grad_norm": 2.574580192565918, "learning_rate": 2.3774454401770396e-08, "loss": 1.5889, "step": 27735 }, { "epoch": 0.9932852257059466, "grad_norm": 1.586375117301941, "learning_rate": 2.3522217222426978e-08, "loss": 1.4725, "step": 27736 }, { "epoch": 0.9933210378355148, "grad_norm": 1.4359043836593628, "learning_rate": 2.3271325097629082e-08, "loss": 1.3233, "step": 27737 }, { "epoch": 0.9933568499650832, "grad_norm": 1.3824795484542847, "learning_rate": 2.3021778030751784e-08, "loss": 1.4131, "step": 27738 }, { "epoch": 0.9933926620946515, "grad_norm": 1.66568124294281, "learning_rate": 2.2773576025170163e-08, "loss": 1.3049, "step": 27739 }, { "epoch": 0.9934284742242198, "grad_norm": 1.4649953842163086, "learning_rate": 2.2526719084192683e-08, "loss": 1.4202, "step": 27740 }, { "epoch": 0.993464286353788, "grad_norm": 1.3860225677490234, "learning_rate": 2.2281207211172218e-08, "loss": 1.4507, "step": 27741 }, { "epoch": 0.9935000984833563, "grad_norm": 1.6990655660629272, "learning_rate": 2.2037040409383924e-08, "loss": 1.3812, "step": 27742 }, { "epoch": 0.9935359106129246, "grad_norm": 1.6962313652038574, "learning_rate": 2.1794218682125168e-08, "loss": 1.4086, "step": 27743 }, { "epoch": 0.9935717227424928, "grad_norm": 1.4338451623916626, "learning_rate": 2.15527420326711e-08, "loss": 1.5719, "step": 27744 }, { "epoch": 0.9936075348720612, "grad_norm": 1.4438395500183105, "learning_rate": 2.131261046425248e-08, "loss": 1.2907, "step": 27745 }, { "epoch": 0.9936433470016295, "grad_norm": 1.768540620803833, "learning_rate": 2.107382398011115e-08, "loss": 1.33, "step": 27746 }, { "epoch": 0.9936791591311978, "grad_norm": 1.6516530513763428, "learning_rate": 2.0836382583466762e-08, "loss": 1.4927, "step": 27747 }, { "epoch": 0.993714971260766, "grad_norm": 1.5076415538787842, "learning_rate": 2.0600286277494552e-08, "loss": 1.2049, "step": 27748 }, { "epoch": 0.9937507833903343, "grad_norm": 1.4209827184677124, "learning_rate": 2.0365535065391962e-08, "loss": 1.297, "step": 27749 }, { "epoch": 0.9937865955199026, "grad_norm": 1.4648021459579468, "learning_rate": 2.013212895030092e-08, "loss": 1.286, "step": 27750 }, { "epoch": 0.9938224076494708, "grad_norm": 2.422774314880371, "learning_rate": 1.9900067935363364e-08, "loss": 1.6095, "step": 27751 }, { "epoch": 0.9938582197790392, "grad_norm": 1.4855307340621948, "learning_rate": 1.966935202371012e-08, "loss": 1.5097, "step": 27752 }, { "epoch": 0.9938940319086075, "grad_norm": 2.235036849975586, "learning_rate": 1.9439981218438708e-08, "loss": 1.4405, "step": 27753 }, { "epoch": 0.9939298440381757, "grad_norm": 1.6031259298324585, "learning_rate": 1.921195552263555e-08, "loss": 1.4121, "step": 27754 }, { "epoch": 0.993965656167744, "grad_norm": 1.9243065118789673, "learning_rate": 1.8985274939375962e-08, "loss": 1.5876, "step": 27755 }, { "epoch": 0.9940014682973123, "grad_norm": 1.6919931173324585, "learning_rate": 1.8759939471690858e-08, "loss": 1.453, "step": 27756 }, { "epoch": 0.9940372804268806, "grad_norm": 1.5720360279083252, "learning_rate": 1.8535949122633346e-08, "loss": 1.4789, "step": 27757 }, { "epoch": 0.9940730925564488, "grad_norm": 1.6576461791992188, "learning_rate": 1.831330389521213e-08, "loss": 1.417, "step": 27758 }, { "epoch": 0.9941089046860172, "grad_norm": 1.631563425064087, "learning_rate": 1.8092003792413714e-08, "loss": 1.3115, "step": 27759 }, { "epoch": 0.9941447168155855, "grad_norm": 1.270306944847107, "learning_rate": 1.7872048817213495e-08, "loss": 1.5715, "step": 27760 }, { "epoch": 0.9941805289451537, "grad_norm": 1.9433504343032837, "learning_rate": 1.7653438972586868e-08, "loss": 1.4304, "step": 27761 }, { "epoch": 0.994216341074722, "grad_norm": 2.1544501781463623, "learning_rate": 1.743617426145372e-08, "loss": 1.4941, "step": 27762 }, { "epoch": 0.9942521532042903, "grad_norm": 1.2280709743499756, "learning_rate": 1.7220254686756142e-08, "loss": 1.0029, "step": 27763 }, { "epoch": 0.9942879653338585, "grad_norm": 1.3527542352676392, "learning_rate": 1.700568025139182e-08, "loss": 1.4559, "step": 27764 }, { "epoch": 0.9943237774634268, "grad_norm": 1.9199059009552002, "learning_rate": 1.679245095824733e-08, "loss": 1.3947, "step": 27765 }, { "epoch": 0.9943595895929952, "grad_norm": 2.5638864040374756, "learning_rate": 1.658056681019815e-08, "loss": 1.5441, "step": 27766 }, { "epoch": 0.9943954017225635, "grad_norm": 1.6788527965545654, "learning_rate": 1.637002781007535e-08, "loss": 1.3188, "step": 27767 }, { "epoch": 0.9944312138521317, "grad_norm": 1.2428195476531982, "learning_rate": 1.6160833960732203e-08, "loss": 1.4178, "step": 27768 }, { "epoch": 0.9944670259817, "grad_norm": 1.7041432857513428, "learning_rate": 1.595298526496647e-08, "loss": 1.5002, "step": 27769 }, { "epoch": 0.9945028381112683, "grad_norm": 1.5376256704330444, "learning_rate": 1.5746481725598117e-08, "loss": 1.6361, "step": 27770 }, { "epoch": 0.9945386502408365, "grad_norm": 1.9216423034667969, "learning_rate": 1.5541323345380497e-08, "loss": 1.2145, "step": 27771 }, { "epoch": 0.9945744623704048, "grad_norm": 1.9160414934158325, "learning_rate": 1.533751012707807e-08, "loss": 1.5186, "step": 27772 }, { "epoch": 0.9946102744999732, "grad_norm": 1.4614883661270142, "learning_rate": 1.5135042073444182e-08, "loss": 1.6713, "step": 27773 }, { "epoch": 0.9946460866295415, "grad_norm": 1.514203667640686, "learning_rate": 1.4933919187198884e-08, "loss": 1.041, "step": 27774 }, { "epoch": 0.9946818987591097, "grad_norm": 1.3184031248092651, "learning_rate": 1.4734141471051122e-08, "loss": 1.3827, "step": 27775 }, { "epoch": 0.994717710888678, "grad_norm": 1.7681211233139038, "learning_rate": 1.4535708927676529e-08, "loss": 1.5363, "step": 27776 }, { "epoch": 0.9947535230182463, "grad_norm": 1.4674324989318848, "learning_rate": 1.4338621559750742e-08, "loss": 1.5941, "step": 27777 }, { "epoch": 0.9947893351478145, "grad_norm": 1.7696936130523682, "learning_rate": 1.4142879369927198e-08, "loss": 1.4344, "step": 27778 }, { "epoch": 0.9948251472773828, "grad_norm": 1.9830656051635742, "learning_rate": 1.3948482360848225e-08, "loss": 1.5663, "step": 27779 }, { "epoch": 0.9948609594069512, "grad_norm": 1.470342993736267, "learning_rate": 1.3755430535111747e-08, "loss": 1.6629, "step": 27780 }, { "epoch": 0.9948967715365195, "grad_norm": 1.7345906496047974, "learning_rate": 1.3563723895326785e-08, "loss": 1.4955, "step": 27781 }, { "epoch": 0.9949325836660877, "grad_norm": 1.6475701332092285, "learning_rate": 1.3373362444057957e-08, "loss": 1.443, "step": 27782 }, { "epoch": 0.994968395795656, "grad_norm": 1.7012711763381958, "learning_rate": 1.3184346183892082e-08, "loss": 1.341, "step": 27783 }, { "epoch": 0.9950042079252243, "grad_norm": 1.4471287727355957, "learning_rate": 1.2996675117349367e-08, "loss": 1.3683, "step": 27784 }, { "epoch": 0.9950400200547925, "grad_norm": 2.2064197063446045, "learning_rate": 1.2810349246961117e-08, "loss": 1.3698, "step": 27785 }, { "epoch": 0.9950758321843608, "grad_norm": 2.3040659427642822, "learning_rate": 1.262536857523644e-08, "loss": 1.364, "step": 27786 }, { "epoch": 0.9951116443139292, "grad_norm": 1.5879874229431152, "learning_rate": 1.2441733104662234e-08, "loss": 1.3092, "step": 27787 }, { "epoch": 0.9951474564434974, "grad_norm": 1.4612466096878052, "learning_rate": 1.2259442837714297e-08, "loss": 1.4385, "step": 27788 }, { "epoch": 0.9951832685730657, "grad_norm": 2.061544418334961, "learning_rate": 1.2078497776835119e-08, "loss": 1.4193, "step": 27789 }, { "epoch": 0.995219080702634, "grad_norm": 1.4527156352996826, "learning_rate": 1.1898897924467189e-08, "loss": 1.4357, "step": 27790 }, { "epoch": 0.9952548928322023, "grad_norm": 1.664737343788147, "learning_rate": 1.1720643283019694e-08, "loss": 1.6887, "step": 27791 }, { "epoch": 0.9952907049617705, "grad_norm": 1.4561351537704468, "learning_rate": 1.1543733854901817e-08, "loss": 1.2292, "step": 27792 }, { "epoch": 0.9953265170913388, "grad_norm": 1.2238926887512207, "learning_rate": 1.1368169642489435e-08, "loss": 1.2025, "step": 27793 }, { "epoch": 0.9953623292209072, "grad_norm": 1.3671455383300781, "learning_rate": 1.119395064813622e-08, "loss": 1.3286, "step": 27794 }, { "epoch": 0.9953981413504754, "grad_norm": 1.3853384256362915, "learning_rate": 1.1021076874195846e-08, "loss": 1.3408, "step": 27795 }, { "epoch": 0.9954339534800437, "grad_norm": 1.7308851480484009, "learning_rate": 1.0849548322988679e-08, "loss": 1.3314, "step": 27796 }, { "epoch": 0.995469765609612, "grad_norm": 1.6281728744506836, "learning_rate": 1.0679364996823982e-08, "loss": 1.4538, "step": 27797 }, { "epoch": 0.9955055777391802, "grad_norm": 1.8174667358398438, "learning_rate": 1.0510526897988815e-08, "loss": 1.2637, "step": 27798 }, { "epoch": 0.9955413898687485, "grad_norm": 2.2247347831726074, "learning_rate": 1.0343034028759136e-08, "loss": 1.6667, "step": 27799 }, { "epoch": 0.9955772019983168, "grad_norm": 1.9621455669403076, "learning_rate": 1.0176886391388695e-08, "loss": 1.5749, "step": 27800 }, { "epoch": 0.9956130141278852, "grad_norm": 1.6727039813995361, "learning_rate": 1.0012083988109045e-08, "loss": 1.2988, "step": 27801 }, { "epoch": 0.9956488262574534, "grad_norm": 1.5761189460754395, "learning_rate": 9.848626821140627e-09, "loss": 1.5141, "step": 27802 }, { "epoch": 0.9956846383870217, "grad_norm": 1.3723686933517456, "learning_rate": 9.686514892681687e-09, "loss": 1.2416, "step": 27803 }, { "epoch": 0.99572045051659, "grad_norm": 1.3970915079116821, "learning_rate": 9.525748204908258e-09, "loss": 1.2696, "step": 27804 }, { "epoch": 0.9957562626461582, "grad_norm": 1.6122543811798096, "learning_rate": 9.366326759985278e-09, "loss": 1.5499, "step": 27805 }, { "epoch": 0.9957920747757265, "grad_norm": 1.3238275051116943, "learning_rate": 9.208250560066578e-09, "loss": 1.0434, "step": 27806 }, { "epoch": 0.9958278869052948, "grad_norm": 1.784379243850708, "learning_rate": 9.051519607272684e-09, "loss": 1.6537, "step": 27807 }, { "epoch": 0.9958636990348632, "grad_norm": 1.6680328845977783, "learning_rate": 8.89613390370192e-09, "loss": 1.658, "step": 27808 }, { "epoch": 0.9958995111644314, "grad_norm": 1.6057300567626953, "learning_rate": 8.742093451463707e-09, "loss": 1.3095, "step": 27809 }, { "epoch": 0.9959353232939997, "grad_norm": 1.358614444732666, "learning_rate": 8.589398252611957e-09, "loss": 1.3424, "step": 27810 }, { "epoch": 0.995971135423568, "grad_norm": 1.5714879035949707, "learning_rate": 8.43804830922279e-09, "loss": 1.2419, "step": 27811 }, { "epoch": 0.9960069475531362, "grad_norm": 1.897676706314087, "learning_rate": 8.28804362331681e-09, "loss": 1.6724, "step": 27812 }, { "epoch": 0.9960427596827045, "grad_norm": 1.658821940422058, "learning_rate": 8.139384196903522e-09, "loss": 1.2458, "step": 27813 }, { "epoch": 0.9960785718122728, "grad_norm": 1.9782476425170898, "learning_rate": 7.992070032003529e-09, "loss": 1.3115, "step": 27814 }, { "epoch": 0.9961143839418412, "grad_norm": 1.8454557657241821, "learning_rate": 7.84610113059303e-09, "loss": 1.4479, "step": 27815 }, { "epoch": 0.9961501960714094, "grad_norm": 1.320586085319519, "learning_rate": 7.70147749462602e-09, "loss": 1.4129, "step": 27816 }, { "epoch": 0.9961860082009777, "grad_norm": 1.5092272758483887, "learning_rate": 7.558199126056487e-09, "loss": 1.5169, "step": 27817 }, { "epoch": 0.996221820330546, "grad_norm": 1.7253291606903076, "learning_rate": 7.416266026816221e-09, "loss": 1.6159, "step": 27818 }, { "epoch": 0.9962576324601142, "grad_norm": 1.819915771484375, "learning_rate": 7.275678198803703e-09, "loss": 1.2604, "step": 27819 }, { "epoch": 0.9962934445896825, "grad_norm": 1.2910900115966797, "learning_rate": 7.136435643917416e-09, "loss": 1.3797, "step": 27820 }, { "epoch": 0.9963292567192508, "grad_norm": 1.5493773221969604, "learning_rate": 6.998538364022533e-09, "loss": 1.6762, "step": 27821 }, { "epoch": 0.9963650688488191, "grad_norm": 1.7264066934585571, "learning_rate": 6.861986360995332e-09, "loss": 1.2949, "step": 27822 }, { "epoch": 0.9964008809783874, "grad_norm": 2.5256896018981934, "learning_rate": 6.726779636645475e-09, "loss": 1.688, "step": 27823 }, { "epoch": 0.9964366931079557, "grad_norm": 1.2364569902420044, "learning_rate": 6.592918192804831e-09, "loss": 1.2282, "step": 27824 }, { "epoch": 0.996472505237524, "grad_norm": 1.384142518043518, "learning_rate": 6.460402031283064e-09, "loss": 1.3266, "step": 27825 }, { "epoch": 0.9965083173670922, "grad_norm": 1.398099660873413, "learning_rate": 6.329231153845427e-09, "loss": 1.0723, "step": 27826 }, { "epoch": 0.9965441294966605, "grad_norm": 2.250969886779785, "learning_rate": 6.199405562268279e-09, "loss": 1.545, "step": 27827 }, { "epoch": 0.9965799416262288, "grad_norm": 1.5381693840026855, "learning_rate": 6.070925258294668e-09, "loss": 1.4875, "step": 27828 }, { "epoch": 0.9966157537557971, "grad_norm": 1.3616868257522583, "learning_rate": 5.943790243656544e-09, "loss": 1.3771, "step": 27829 }, { "epoch": 0.9966515658853654, "grad_norm": 1.404511570930481, "learning_rate": 5.818000520052547e-09, "loss": 1.3444, "step": 27830 }, { "epoch": 0.9966873780149337, "grad_norm": 1.3971214294433594, "learning_rate": 5.69355608919242e-09, "loss": 1.3699, "step": 27831 }, { "epoch": 0.996723190144502, "grad_norm": 1.6311243772506714, "learning_rate": 5.570456952741499e-09, "loss": 1.3255, "step": 27832 }, { "epoch": 0.9967590022740702, "grad_norm": 1.9845330715179443, "learning_rate": 5.448703112365117e-09, "loss": 1.5725, "step": 27833 }, { "epoch": 0.9967948144036385, "grad_norm": 1.6047090291976929, "learning_rate": 5.328294569673098e-09, "loss": 1.4876, "step": 27834 }, { "epoch": 0.9968306265332068, "grad_norm": 1.8364373445510864, "learning_rate": 5.209231326319674e-09, "loss": 1.3304, "step": 27835 }, { "epoch": 0.9968664386627751, "grad_norm": 1.2628228664398193, "learning_rate": 5.0915133838924656e-09, "loss": 1.21, "step": 27836 }, { "epoch": 0.9969022507923434, "grad_norm": 1.866013765335083, "learning_rate": 4.975140743967987e-09, "loss": 1.4851, "step": 27837 }, { "epoch": 0.9969380629219117, "grad_norm": 1.4663585424423218, "learning_rate": 4.860113408122757e-09, "loss": 1.7521, "step": 27838 }, { "epoch": 0.9969738750514799, "grad_norm": 1.7713894844055176, "learning_rate": 4.746431377899985e-09, "loss": 1.4532, "step": 27839 }, { "epoch": 0.9970096871810482, "grad_norm": 1.5363188982009888, "learning_rate": 4.634094654820675e-09, "loss": 1.2711, "step": 27840 }, { "epoch": 0.9970454993106165, "grad_norm": 1.7956669330596924, "learning_rate": 4.523103240416937e-09, "loss": 1.5147, "step": 27841 }, { "epoch": 0.9970813114401847, "grad_norm": 2.0350286960601807, "learning_rate": 4.413457136165367e-09, "loss": 1.5831, "step": 27842 }, { "epoch": 0.9971171235697531, "grad_norm": 1.6121965646743774, "learning_rate": 4.3051563435425605e-09, "loss": 1.3732, "step": 27843 }, { "epoch": 0.9971529356993214, "grad_norm": 1.6830921173095703, "learning_rate": 4.198200864014012e-09, "loss": 1.6102, "step": 27844 }, { "epoch": 0.9971887478288897, "grad_norm": 1.550490379333496, "learning_rate": 4.092590699011911e-09, "loss": 1.4481, "step": 27845 }, { "epoch": 0.9972245599584579, "grad_norm": 1.875200629234314, "learning_rate": 3.988325849957342e-09, "loss": 1.3372, "step": 27846 }, { "epoch": 0.9972603720880262, "grad_norm": 1.4787503480911255, "learning_rate": 3.885406318260288e-09, "loss": 1.4715, "step": 27847 }, { "epoch": 0.9972961842175945, "grad_norm": 1.6989432573318481, "learning_rate": 3.783832105286322e-09, "loss": 1.3271, "step": 27848 }, { "epoch": 0.9973319963471627, "grad_norm": 1.4826751947402954, "learning_rate": 3.6836032124232256e-09, "loss": 1.3739, "step": 27849 }, { "epoch": 0.9973678084767311, "grad_norm": 1.7975443601608276, "learning_rate": 3.5847196410143667e-09, "loss": 1.6923, "step": 27850 }, { "epoch": 0.9974036206062994, "grad_norm": 1.5432265996932983, "learning_rate": 3.4871813923809117e-09, "loss": 1.3351, "step": 27851 }, { "epoch": 0.9974394327358677, "grad_norm": 1.5838642120361328, "learning_rate": 3.390988467844025e-09, "loss": 1.579, "step": 27852 }, { "epoch": 0.9974752448654359, "grad_norm": 1.9075052738189697, "learning_rate": 3.2961408686915662e-09, "loss": 1.3764, "step": 27853 }, { "epoch": 0.9975110569950042, "grad_norm": 1.5979747772216797, "learning_rate": 3.2026385962113937e-09, "loss": 1.4625, "step": 27854 }, { "epoch": 0.9975468691245725, "grad_norm": 1.5051318407058716, "learning_rate": 3.110481651646957e-09, "loss": 1.3817, "step": 27855 }, { "epoch": 0.9975826812541407, "grad_norm": 2.1045610904693604, "learning_rate": 3.0196700362417065e-09, "loss": 1.4953, "step": 27856 }, { "epoch": 0.9976184933837091, "grad_norm": 1.5052860975265503, "learning_rate": 2.930203751227989e-09, "loss": 1.3494, "step": 27857 }, { "epoch": 0.9976543055132774, "grad_norm": 1.5225681066513062, "learning_rate": 2.8420827977937437e-09, "loss": 1.5778, "step": 27858 }, { "epoch": 0.9976901176428457, "grad_norm": 1.5422025918960571, "learning_rate": 2.7553071771380112e-09, "loss": 1.3814, "step": 27859 }, { "epoch": 0.9977259297724139, "grad_norm": 3.553717851638794, "learning_rate": 2.6698768904154236e-09, "loss": 1.4703, "step": 27860 }, { "epoch": 0.9977617419019822, "grad_norm": 1.6214388608932495, "learning_rate": 2.585791938791715e-09, "loss": 1.1495, "step": 27861 }, { "epoch": 0.9977975540315505, "grad_norm": 1.3043503761291504, "learning_rate": 2.5030523233771087e-09, "loss": 1.4878, "step": 27862 }, { "epoch": 0.9978333661611187, "grad_norm": 1.5366240739822388, "learning_rate": 2.4216580453040314e-09, "loss": 1.0653, "step": 27863 }, { "epoch": 0.9978691782906871, "grad_norm": 1.7023508548736572, "learning_rate": 2.3416091056605027e-09, "loss": 1.5414, "step": 27864 }, { "epoch": 0.9979049904202554, "grad_norm": 1.317268967628479, "learning_rate": 2.2629055055234384e-09, "loss": 1.4184, "step": 27865 }, { "epoch": 0.9979408025498236, "grad_norm": 1.655472755432129, "learning_rate": 2.18554724594755e-09, "loss": 1.6407, "step": 27866 }, { "epoch": 0.9979766146793919, "grad_norm": 1.6678001880645752, "learning_rate": 2.1095343279764477e-09, "loss": 1.4937, "step": 27867 }, { "epoch": 0.9980124268089602, "grad_norm": 1.3210474252700806, "learning_rate": 2.0348667526426392e-09, "loss": 1.3464, "step": 27868 }, { "epoch": 0.9980482389385285, "grad_norm": 1.340619444847107, "learning_rate": 1.961544520934222e-09, "loss": 1.1373, "step": 27869 }, { "epoch": 0.9980840510680967, "grad_norm": 2.0693037509918213, "learning_rate": 1.8895676338392952e-09, "loss": 1.6612, "step": 27870 }, { "epoch": 0.9981198631976651, "grad_norm": 1.7016184329986572, "learning_rate": 1.8189360923459575e-09, "loss": 1.3875, "step": 27871 }, { "epoch": 0.9981556753272334, "grad_norm": 1.4674826860427856, "learning_rate": 1.7496498973756936e-09, "loss": 1.4655, "step": 27872 }, { "epoch": 0.9981914874568016, "grad_norm": 1.7653722763061523, "learning_rate": 1.6817090498832954e-09, "loss": 1.4544, "step": 27873 }, { "epoch": 0.9982272995863699, "grad_norm": 1.617550015449524, "learning_rate": 1.615113550779146e-09, "loss": 1.4976, "step": 27874 }, { "epoch": 0.9982631117159382, "grad_norm": 1.6793699264526367, "learning_rate": 1.5498634009514235e-09, "loss": 1.6882, "step": 27875 }, { "epoch": 0.9982989238455064, "grad_norm": 1.7984838485717773, "learning_rate": 1.4859586012772042e-09, "loss": 1.6261, "step": 27876 }, { "epoch": 0.9983347359750747, "grad_norm": 1.3389443159103394, "learning_rate": 1.4233991526224622e-09, "loss": 1.2497, "step": 27877 }, { "epoch": 0.9983705481046431, "grad_norm": 1.4950339794158936, "learning_rate": 1.3621850558309668e-09, "loss": 1.4324, "step": 27878 }, { "epoch": 0.9984063602342114, "grad_norm": 1.9988292455673218, "learning_rate": 1.3023163117242832e-09, "loss": 1.3851, "step": 27879 }, { "epoch": 0.9984421723637796, "grad_norm": 1.5393435955047607, "learning_rate": 1.2437929211017718e-09, "loss": 1.4378, "step": 27880 }, { "epoch": 0.9984779844933479, "grad_norm": 1.34359872341156, "learning_rate": 1.1866148847516912e-09, "loss": 1.4509, "step": 27881 }, { "epoch": 0.9985137966229162, "grad_norm": 1.7370821237564087, "learning_rate": 1.1307822034511973e-09, "loss": 1.3819, "step": 27882 }, { "epoch": 0.9985496087524844, "grad_norm": 2.0988402366638184, "learning_rate": 1.0762948779441396e-09, "loss": 1.3734, "step": 27883 }, { "epoch": 0.9985854208820527, "grad_norm": 1.5026785135269165, "learning_rate": 1.0231529089632652e-09, "loss": 1.4251, "step": 27884 }, { "epoch": 0.9986212330116211, "grad_norm": 2.722470283508301, "learning_rate": 9.713562972302193e-10, "loss": 1.5023, "step": 27885 }, { "epoch": 0.9986570451411894, "grad_norm": 1.7294663190841675, "learning_rate": 9.209050434333399e-10, "loss": 1.3209, "step": 27886 }, { "epoch": 0.9986928572707576, "grad_norm": 2.4848201274871826, "learning_rate": 8.717991482609655e-10, "loss": 1.7707, "step": 27887 }, { "epoch": 0.9987286694003259, "grad_norm": 1.535007357597351, "learning_rate": 8.240386123681276e-10, "loss": 1.4537, "step": 27888 }, { "epoch": 0.9987644815298942, "grad_norm": 1.3857157230377197, "learning_rate": 7.776234363987555e-10, "loss": 1.79, "step": 27889 }, { "epoch": 0.9988002936594624, "grad_norm": 1.4100427627563477, "learning_rate": 7.325536209856765e-10, "loss": 1.578, "step": 27890 }, { "epoch": 0.9988361057890307, "grad_norm": 1.3705559968948364, "learning_rate": 6.888291667173085e-10, "loss": 1.3029, "step": 27891 }, { "epoch": 0.9988719179185991, "grad_norm": 1.7069154977798462, "learning_rate": 6.464500741820701e-10, "loss": 1.4618, "step": 27892 }, { "epoch": 0.9989077300481674, "grad_norm": 2.2990357875823975, "learning_rate": 6.054163439683791e-10, "loss": 1.4391, "step": 27893 }, { "epoch": 0.9989435421777356, "grad_norm": 1.4286106824874878, "learning_rate": 5.65727976620245e-10, "loss": 1.5029, "step": 27894 }, { "epoch": 0.9989793543073039, "grad_norm": 1.4237838983535767, "learning_rate": 5.273849726705748e-10, "loss": 1.4531, "step": 27895 }, { "epoch": 0.9990151664368722, "grad_norm": 1.7203515768051147, "learning_rate": 4.903873326300712e-10, "loss": 1.3107, "step": 27896 }, { "epoch": 0.9990509785664404, "grad_norm": 1.3520474433898926, "learning_rate": 4.5473505700943664e-10, "loss": 1.3599, "step": 27897 }, { "epoch": 0.9990867906960087, "grad_norm": 1.73139488697052, "learning_rate": 4.2042814627496483e-10, "loss": 1.4747, "step": 27898 }, { "epoch": 0.9991226028255771, "grad_norm": 1.3812834024429321, "learning_rate": 3.874666008929495e-10, "loss": 1.3831, "step": 27899 }, { "epoch": 0.9991584149551453, "grad_norm": 1.7007373571395874, "learning_rate": 3.558504213074798e-10, "loss": 1.186, "step": 27900 }, { "epoch": 0.9991942270847136, "grad_norm": 2.0018959045410156, "learning_rate": 3.255796079404405e-10, "loss": 1.5158, "step": 27901 }, { "epoch": 0.9992300392142819, "grad_norm": 1.486700177192688, "learning_rate": 2.9665416120261414e-10, "loss": 1.6186, "step": 27902 }, { "epoch": 0.9992658513438502, "grad_norm": 2.046440601348877, "learning_rate": 2.69074081493681e-10, "loss": 1.1622, "step": 27903 }, { "epoch": 0.9993016634734184, "grad_norm": 1.8606266975402832, "learning_rate": 2.4283936915781014e-10, "loss": 1.4256, "step": 27904 }, { "epoch": 0.9993374756029867, "grad_norm": 1.6825835704803467, "learning_rate": 2.1795002457247748e-10, "loss": 1.3446, "step": 27905 }, { "epoch": 0.9993732877325551, "grad_norm": 1.6700379848480225, "learning_rate": 1.9440604807074992e-10, "loss": 1.4342, "step": 27906 }, { "epoch": 0.9994090998621233, "grad_norm": 1.4203354120254517, "learning_rate": 1.7220743995238763e-10, "loss": 1.5311, "step": 27907 }, { "epoch": 0.9994449119916916, "grad_norm": 1.6370415687561035, "learning_rate": 1.5135420052825312e-10, "loss": 1.5626, "step": 27908 }, { "epoch": 0.9994807241212599, "grad_norm": 1.8386884927749634, "learning_rate": 1.318463300870043e-10, "loss": 1.1738, "step": 27909 }, { "epoch": 0.9995165362508281, "grad_norm": 2.393831253051758, "learning_rate": 1.136838288728903e-10, "loss": 1.4101, "step": 27910 }, { "epoch": 0.9995523483803964, "grad_norm": 1.738539695739746, "learning_rate": 9.68666971412624e-11, "loss": 1.7001, "step": 27911 }, { "epoch": 0.9995881605099647, "grad_norm": 1.6437656879425049, "learning_rate": 8.139493511416518e-11, "loss": 1.5699, "step": 27912 }, { "epoch": 0.9996239726395331, "grad_norm": 1.6513583660125732, "learning_rate": 6.726854300254104e-11, "loss": 1.7211, "step": 27913 }, { "epoch": 0.9996597847691013, "grad_norm": 1.3128859996795654, "learning_rate": 5.448752098402565e-11, "loss": 1.0227, "step": 27914 }, { "epoch": 0.9996955968986696, "grad_norm": 1.9902230501174927, "learning_rate": 4.3051869258459163e-11, "loss": 1.5862, "step": 27915 }, { "epoch": 0.9997314090282379, "grad_norm": 2.3277649879455566, "learning_rate": 3.296158795906834e-11, "loss": 1.4745, "step": 27916 }, { "epoch": 0.9997672211578061, "grad_norm": 1.625291109085083, "learning_rate": 2.421667721907994e-11, "loss": 1.3652, "step": 27917 }, { "epoch": 0.9998030332873744, "grad_norm": 1.4065899848937988, "learning_rate": 1.68171371606185e-11, "loss": 1.5924, "step": 27918 }, { "epoch": 0.9998388454169427, "grad_norm": 1.4588338136672974, "learning_rate": 1.0762967894706321e-11, "loss": 1.3983, "step": 27919 }, { "epoch": 0.9998746575465111, "grad_norm": 1.98700749874115, "learning_rate": 6.054169487956784e-12, "loss": 1.338, "step": 27920 }, { "epoch": 0.9999104696760793, "grad_norm": 1.688843846321106, "learning_rate": 2.690742006983271e-12, "loss": 1.1956, "step": 27921 }, { "epoch": 0.9999462818056476, "grad_norm": 1.4015154838562012, "learning_rate": 6.726855072969329e-13, "loss": 1.3722, "step": 27922 }, { "epoch": 0.9999820939352159, "grad_norm": 1.3305832147598267, "learning_rate": 0.0, "loss": 1.4175, "step": 27923 }, { "epoch": 0.9999820939352159, "step": 27923, "total_flos": 2.3643443035111424e+18, "train_loss": 1.5265222442264104, "train_runtime": 42480.1433, "train_samples_per_second": 5.259, "train_steps_per_second": 0.657 } ], "logging_steps": 1.0, "max_steps": 27923, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.3643443035111424e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }